You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2017/04/04 17:54:58 UTC
hive git commit: HIVE-15996: Implement multiargument GROUPING
function (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 90f2a047a -> 1a1e8357b
HIVE-15996: Implement multiargument GROUPING function (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1a1e8357
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1a1e8357
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1a1e8357
Branch: refs/heads/master
Commit: 1a1e8357bcb09ab7b775f26b83f00d6f687bbc23
Parents: 90f2a04
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Mon Feb 27 09:24:06 2017 +0000
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Tue Apr 4 19:54:16 2017 +0200
----------------------------------------------------------------------
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 80 ++--
.../hive/ql/udf/generic/GenericUDFGrouping.java | 45 +-
.../groupby_grouping_sets_grouping.q | 36 ++
.../vector_groupby_grouping_sets_grouping.q | 36 ++
.../groupby_grouping_sets_grouping.q.out | 366 ++++++++++++++++-
.../vector_groupby_grouping_sets_grouping.q.out | 410 ++++++++++++++++++-
.../results/clientpositive/perf/query36.q.out | 4 +-
.../results/clientpositive/perf/query70.q.out | 4 +-
.../results/clientpositive/perf/query86.q.out | 4 +-
9 files changed, 903 insertions(+), 82 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1a1e8357/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index f2a6ade..b2e1c88 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -53,7 +53,6 @@ import org.antlr.runtime.tree.TreeWizard;
import org.antlr.runtime.tree.TreeWizard.ContextVisitor;
import org.apache.calcite.rel.RelNode;
import org.apache.commons.lang.StringUtils;
-import org.apache.commons.lang.mutable.MutableBoolean;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -3062,8 +3061,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
protected static ASTNode rewriteGroupingFunctionAST(final List<ASTNode> grpByAstExprs, ASTNode targetNode,
final boolean noneSet) throws SemanticException {
- final MutableBoolean visited = new MutableBoolean(false);
- final MutableBoolean found = new MutableBoolean(false);
TreeVisitorAction action = new TreeVisitorAction() {
@@ -3075,45 +3072,62 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
@Override
public Object post(Object t) {
ASTNode root = (ASTNode) t;
- if (root.getType() == HiveParser.TOK_FUNCTION && root.getChildCount() == 2) {
+ if (root.getType() == HiveParser.TOK_FUNCTION) {
ASTNode func = (ASTNode) ParseDriver.adaptor.getChild(root, 0);
- if (func.getText().equals("grouping")) {
- ASTNode c = (ASTNode) ParseDriver.adaptor.getChild(root, 1);
- visited.setValue(true);
- for (int i = 0; i < grpByAstExprs.size(); i++) {
- ASTNode grpByExpr = grpByAstExprs.get(i);
- if (grpByExpr.toStringTree().equals(c.toStringTree())) {
- ASTNode child1;
- if (noneSet) {
- // Query does not contain CUBE, ROLLUP, or GROUPING SETS, and thus,
- // grouping should return 0
- child1 = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral,
- String.valueOf(0));
- } else {
- // We refer to grouping_id column
- child1 = (ASTNode) ParseDriver.adaptor.create(
- HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL");
- ParseDriver.adaptor.addChild(child1, ParseDriver.adaptor.create(
- HiveParser.Identifier, VirtualColumn.GROUPINGID.getName()));
+ if (func.getText().equals("grouping") && func.getChildCount() == 0) {
+ int numberOperands = ParseDriver.adaptor.getChildCount(root);
+ // We implement this logic using replaceChildren instead of replacing
+ // the root node itself because windowing logic stores multiple
+ // pointers to the AST, and replacing root might lead to some pointers
+ // leading to non-rewritten version
+ ASTNode newRoot = new ASTNode();
+ // Rewritten grouping function
+ ASTNode groupingFunc = (ASTNode) ParseDriver.adaptor.create(
+ HiveParser.Identifier, "grouping");
+ ParseDriver.adaptor.addChild(groupingFunc, ParseDriver.adaptor.create(
+ HiveParser.Identifier, "rewritten"));
+ newRoot.addChild(groupingFunc);
+ // Grouping ID reference
+ ASTNode childGroupingID;
+ if (noneSet) {
+ // Query does not contain CUBE, ROLLUP, or GROUPING SETS, and thus,
+ // grouping should return 0
+ childGroupingID = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral,
+ String.valueOf(0));
+ } else {
+ // We refer to grouping_id column
+ childGroupingID = (ASTNode) ParseDriver.adaptor.create(
+ HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL");
+ ParseDriver.adaptor.addChild(childGroupingID, ParseDriver.adaptor.create(
+ HiveParser.Identifier, VirtualColumn.GROUPINGID.getName()));
+ }
+ newRoot.addChild(childGroupingID);
+ // Indices
+ for (int i = 1; i < numberOperands; i++) {
+ ASTNode c = (ASTNode) ParseDriver.adaptor.getChild(root, i);
+ for (int j = 0; j < grpByAstExprs.size(); j++) {
+ ASTNode grpByExpr = grpByAstExprs.get(j);
+ if (grpByExpr.toStringTree().equals(c.toStringTree())) {
+ // Create and add AST node with position of grouping function input
+ // in group by clause
+ ASTNode childN = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral,
+ String.valueOf(IntMath.mod(-j-1, grpByAstExprs.size())));
+ newRoot.addChild(childN);
+ break;
}
- ASTNode child2 = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral,
- String.valueOf(IntMath.mod(-i-1, grpByAstExprs.size())));
- root.setChild(1, child1);
- root.addChild(child2);
- found.setValue(true);
- break;
}
}
+ if (numberOperands + 1 != ParseDriver.adaptor.getChildCount(newRoot)) {
+ throw new RuntimeException(ErrorMsg.HIVE_GROUPING_FUNCTION_EXPR_NOT_IN_GROUPBY.getMsg());
+ }
+ // Replace expression
+ root.replaceChildren(0, numberOperands - 1, newRoot);
}
}
return t;
}
};
- ASTNode newTargetNode = (ASTNode) new TreeVisitor(ParseDriver.adaptor).visit(targetNode, action);
- if (visited.booleanValue() && !found.booleanValue()) {
- throw new SemanticException(ErrorMsg.HIVE_GROUPING_FUNCTION_EXPR_NOT_IN_GROUPBY.getMsg());
- }
- return newTargetNode;
+ return (ASTNode) new TreeVisitor(ParseDriver.adaptor).visit(targetNode, action);
}
private Operator genPlanForSubQueryPredicate(
http://git-wip-us.apache.org/repos/asf/hive/blob/1a1e8357/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java
index cc01526..cee0e14 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java
@@ -23,7 +23,6 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
-import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
@@ -32,27 +31,29 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspecto
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantIntObjectInspector;
+import org.apache.hadoop.io.IntWritable;
+
+import com.google.common.math.IntMath;
/**
* UDF grouping
*/
@Description(name = "grouping",
-value = "_FUNC_(a, b) - Indicates whether a specified column expression in "
+value = "_FUNC_(a, p1, ..., pn) - Indicates whether a specified column expression in "
+ "is aggregated or not. Returns 1 for aggregated or 0 for not aggregated. ",
-extended = "a is the grouping id, b is the index we want to extract")
+extended = "a is the grouping id, p1...pn are the indices we want to extract")
@UDFType(deterministic = true)
-@NDV(maxNdv = 2)
public class GenericUDFGrouping extends GenericUDF {
private transient IntObjectInspector groupingIdOI;
- private int index = 0;
- private ByteWritable byteWritable = new ByteWritable();
+ private int[] indices;
+ private IntWritable intWritable = new IntWritable();
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
- if (arguments.length != 2) {
+ if (arguments.length < 2) {
throw new UDFArgumentLengthException(
- "grouping() requires 2 argument, got " + arguments.length);
+ "grouping() requires at least 2 argument, got " + arguments.length);
}
if (arguments[0].getCategory() != Category.PRIMITIVE) {
@@ -64,27 +65,37 @@ public class GenericUDFGrouping extends GenericUDF {
}
groupingIdOI = (IntObjectInspector) arguments[0];
- PrimitiveObjectInspector arg2OI = (PrimitiveObjectInspector) arguments[1];
- if (!(arg2OI instanceof WritableConstantIntObjectInspector)) {
- throw new UDFArgumentTypeException(1, "The second argument to grouping() must be a constant");
+ indices = new int[arguments.length - 1];
+ for (int i = 1; i < arguments.length; i++) {
+ PrimitiveObjectInspector arg2OI = (PrimitiveObjectInspector) arguments[i];
+ if (!(arg2OI instanceof WritableConstantIntObjectInspector)) {
+ throw new UDFArgumentTypeException(i, "Must be a constant");
+ }
+ indices[i - 1] = ((WritableConstantIntObjectInspector)arg2OI).getWritableConstantValue().get();
}
- index = ((WritableConstantIntObjectInspector)arg2OI).getWritableConstantValue().get();
- return PrimitiveObjectInspectorFactory.writableByteObjectInspector;
+ return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
// groupingId = PrimitiveObjectInspectorUtils.getInt(arguments[0].get(), groupingIdOI);
// Check that the bit at the given index is '1' or '0'
- byteWritable.set((byte)
- ((PrimitiveObjectInspectorUtils.getInt(arguments[0].get(), groupingIdOI) >> index) & 1));
- return byteWritable;
+ int result = 0;
+ // grouping(c1, c2, c3)
+ // is equivalent to
+ // 4 * grouping(c1) + 2 * grouping(c2) + grouping(c3)
+ for (int a = 1; a < arguments.length; a++) {
+ result += IntMath.pow(2, indices.length - a) *
+ ((PrimitiveObjectInspectorUtils.getInt(arguments[0].get(), groupingIdOI) >> indices[a - 1]) & 1);
+ }
+ intWritable.set(result);
+ return intWritable;
}
@Override
public String getDisplayString(String[] children) {
- assert (children.length == 2);
+ assert (children.length > 1);
return getStandardDisplayString("grouping", children);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/1a1e8357/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q
index 34759ca..7157106 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q
@@ -119,3 +119,39 @@ select key, value
from T1
group by key, value
having grouping(key) = 0;
+
+explain
+select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by cube(key, value);
+
+select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by cube(key, value);
+
+explain
+select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by cube(key, value);
+
+select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by cube(key, value);
+
+explain
+select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by rollup(key, value);
+
+select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by rollup(key, value);
+
+explain
+select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by rollup(key, value);
+
+select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by rollup(key, value);
http://git-wip-us.apache.org/repos/asf/hive/blob/1a1e8357/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q b/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q
index 7b7c892..b9c2a7b 100644
--- a/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q
+++ b/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q
@@ -97,3 +97,39 @@ from T1
group by cube(key, value)
having grouping(key) = 1 OR grouping(value) = 1
order by x desc, case when x = 1 then key end;
+
+explain
+select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by cube(key, value);
+
+select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by cube(key, value);
+
+explain
+select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by cube(key, value);
+
+select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by cube(key, value);
+
+explain
+select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by rollup(key, value);
+
+select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by rollup(key, value);
+
+explain
+select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by rollup(key, value);
+
+select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by rollup(key, value);
http://git-wip-us.apache.org/repos/asf/hive/blob/1a1e8357/ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out b/ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out
index b82d9c2..473d17a 100644
--- a/ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out
+++ b/ql/src/test/results/clientpositive/groupby_grouping_sets_grouping.q.out
@@ -56,7 +56,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint)
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -138,7 +138,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint)
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -227,7 +227,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (UDFToInteger(grouping(_col2, 1)) = 1) (type: boolean)
+ predicate: (grouping(_col2, 1) = 1) (type: boolean)
Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: int)
@@ -314,10 +314,10 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((UDFToInteger(grouping(_col2, 1)) = 1) or (UDFToInteger(grouping(_col2, 0)) = 1)) (type: boolean)
+ predicate: ((grouping(_col2, 1) = 1) or (grouping(_col2, 0) = 1)) (type: boolean)
Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: tinyint)
+ expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -332,13 +332,13 @@ STAGE PLANS:
Map Operator Tree:
TableScan
Reduce Output Operator
- key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) ELSE (null) END (type: int)
+ key expressions: _col2 (type: int), CASE WHEN ((_col2 = 1)) THEN (_col0) ELSE (null) END (type: int)
sort order: -+
Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: int)
Reduce Operator Tree:
Select Operator
- expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint)
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -423,7 +423,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint)
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -505,7 +505,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint)
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -681,7 +681,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: tinyint)
+ expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -696,13 +696,13 @@ STAGE PLANS:
Map Operator Tree:
TableScan
Reduce Output Operator
- key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) END (type: int)
+ key expressions: _col2 (type: int), CASE WHEN ((_col2 = 1)) THEN (_col0) END (type: int)
sort order: -+
Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: int)
Reduce Operator Tree:
Select Operator
- expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint)
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -787,7 +787,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: int), 0 (type: tinyint), 0 (type: tinyint)
+ expressions: _col0 (type: int), _col1 (type: int), 0 (type: int), 0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -864,7 +864,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: int), 0 (type: tinyint)
+ expressions: _col0 (type: int), _col1 (type: int), 0 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -976,3 +976,341 @@ POSTHOOK: Input: default@t1
3 3
3 NULL
4 5
+PREHOOK: query: explain
+select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by cube(key, value)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by cube(key, value)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: key, value
+ Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: key (type: int), value (type: int), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1, 0) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by cube(key, value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by cube(key, value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1 1 0 0
+1 NULL 0 0
+1 NULL 1 1
+2 2 0 0
+2 NULL 1 1
+3 3 0 0
+3 NULL 0 0
+3 NULL 1 1
+4 5 0 0
+4 NULL 1 1
+NULL 1 2 2
+NULL 2 2 2
+NULL 3 2 2
+NULL 5 2 2
+NULL NULL 2 2
+NULL NULL 3 3
+PREHOOK: query: explain
+select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by cube(key, value)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by cube(key, value)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: key, value
+ Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: key (type: int), value (type: int), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 12 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 0, 1) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by cube(key, value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by cube(key, value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1 1 0 0
+1 NULL 0 0
+1 NULL 1 2
+2 2 0 0
+2 NULL 1 2
+3 3 0 0
+3 NULL 0 0
+3 NULL 1 2
+4 5 0 0
+4 NULL 1 2
+NULL 1 2 1
+NULL 2 2 1
+NULL 3 2 1
+NULL 5 2 1
+NULL NULL 2 1
+NULL NULL 3 3
+PREHOOK: query: explain
+select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by rollup(key, value)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by rollup(key, value)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: key, value
+ Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: key (type: int), value (type: int), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1, 0) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by rollup(key, value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by rollup(key, value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1 1 0 0
+1 NULL 0 0
+1 NULL 1 1
+2 2 0 0
+2 NULL 1 1
+3 3 0 0
+3 NULL 0 0
+3 NULL 1 1
+4 5 0 0
+4 NULL 1 1
+NULL NULL 3 3
+PREHOOK: query: explain
+select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by rollup(key, value)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by rollup(key, value)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: key, value
+ Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: key (type: int), value (type: int), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 0, 1) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 4 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by rollup(key, value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by rollup(key, value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1 1 0 0
+1 NULL 0 0
+1 NULL 1 2
+2 2 0 0
+2 NULL 1 2
+3 3 0 0
+3 NULL 0 0
+3 NULL 1 2
+4 5 0 0
+4 NULL 1 2
+NULL NULL 3 3
http://git-wip-us.apache.org/repos/asf/hive/blob/1a1e8357/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out
index 8696207..5af9e61 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out
@@ -80,7 +80,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint)
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -174,7 +174,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint)
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -275,7 +275,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (UDFToInteger(grouping(_col2, 1)) = 1) (type: boolean)
+ predicate: (grouping(_col2, 1) = 1) (type: boolean)
Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: int)
@@ -374,14 +374,14 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((UDFToInteger(grouping(_col2, 1)) = 1) or (UDFToInteger(grouping(_col2, 0)) = 1)) (type: boolean)
+ predicate: ((grouping(_col2, 1) = 1) or (grouping(_col2, 0) = 1)) (type: boolean)
Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: tinyint)
+ expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) ELSE (null) END (type: int)
+ key expressions: _col2 (type: int), CASE WHEN ((_col2 = 1)) THEN (_col0) ELSE (null) END (type: int)
sort order: -+
Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: int)
@@ -389,7 +389,7 @@ STAGE PLANS:
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
- expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint)
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -486,7 +486,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint)
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -580,7 +580,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: tinyint), grouping(_col2, 0) (type: tinyint)
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1) (type: int), grouping(_col2, 0) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -780,11 +780,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: tinyint)
+ expressions: _col0 (type: int), _col1 (type: int), (grouping(_col2, 1) + grouping(_col2, 0)) (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col2 (type: tinyint), CASE WHEN ((_col2 = 1)) THEN (_col0) END (type: int)
+ key expressions: _col2 (type: int), CASE WHEN ((_col2 = 1)) THEN (_col0) END (type: int)
sort order: -+
Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: int)
@@ -792,7 +792,7 @@ STAGE PLANS:
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
- expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: tinyint)
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -836,3 +836,389 @@ NULL 3 1
NULL 5 1
NULL NULL 1
NULL NULL 2
+PREHOOK: query: explain
+select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by cube(key, value)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by cube(key, value)
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: key, value
+ Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: key (type: int), value (type: int), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1, 0) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by cube(key, value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by cube(key, value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+key value grouping__id _c3
+1 1 0 0
+1 NULL 0 0
+1 NULL 1 1
+2 2 0 0
+2 NULL 1 1
+3 3 0 0
+3 NULL 0 0
+3 NULL 1 1
+4 5 0 0
+4 NULL 1 1
+NULL 1 2 2
+NULL 2 2 2
+NULL 3 2 2
+NULL 5 2 2
+NULL NULL 2 2
+NULL NULL 3 3
+PREHOOK: query: explain
+select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by cube(key, value)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by cube(key, value)
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: key, value
+ Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: key (type: int), value (type: int), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 24 Data size: 160 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 0, 1) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by cube(key, value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by cube(key, value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+key value grouping__id _c3
+1 1 0 0
+1 NULL 0 0
+1 NULL 1 2
+2 2 0 0
+2 NULL 1 2
+3 3 0 0
+3 NULL 0 0
+3 NULL 1 2
+4 5 0 0
+4 NULL 1 2
+NULL 1 2 1
+NULL 2 2 1
+NULL 3 2 1
+NULL 5 2 1
+NULL NULL 2 1
+NULL NULL 3 3
+PREHOOK: query: explain
+select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by rollup(key, value)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by rollup(key, value)
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: key, value
+ Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: key (type: int), value (type: int), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 1, 0) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by rollup(key, value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, `grouping__id`, grouping(key, value)
+from T1
+group by rollup(key, value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+key value grouping__id _c3
+1 1 0 0
+1 NULL 0 0
+1 NULL 1 1
+2 2 0 0
+2 NULL 1 1
+3 3 0 0
+3 NULL 0 0
+3 NULL 1 1
+4 5 0 0
+4 NULL 1 1
+NULL NULL 3 3
+PREHOOK: query: explain
+select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by rollup(key, value)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by rollup(key, value)
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: key, value
+ Statistics: Num rows: 6 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: key (type: int), value (type: int), 0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int)
+ Statistics: Num rows: 18 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), grouping(_col2, 0, 1) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 9 Data size: 60 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by rollup(key, value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, `grouping__id`, grouping(value, key)
+from T1
+group by rollup(key, value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+key value grouping__id _c3
+1 1 0 0
+1 NULL 0 0
+1 NULL 1 2
+2 2 0 0
+2 NULL 1 2
+3 3 0 0
+3 NULL 0 0
+3 NULL 1 2
+4 5 0 0
+4 NULL 1 2
+NULL NULL 3 3
http://git-wip-us.apache.org/repos/asf/hive/blob/1a1e8357/ql/src/test/results/clientpositive/perf/query36.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query36.q.out b/ql/src/test/results/clientpositive/perf/query36.q.out
index b356628..57ab26a 100644
--- a/ql/src/test/results/clientpositive/perf/query36.q.out
+++ b/ql/src/test/results/clientpositive/perf/query36.q.out
@@ -81,12 +81,12 @@ Stage-0
Select Operator [SEL_30] (rows=1149975358 width=88)
Output:["_col0","_col1","_col2","_col3","_col4"]
PTF Operator [PTF_29] (rows=1149975358 width=88)
- Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(_col4 / _col5) ASC NULLS FIRST","partition by:":"(grouping(_col6, 1) + grouping(_col6, 0)), CASE WHEN ((UDFToInteger(grouping(_col6, 0)) = 0)) THEN (_col0) ELSE (null) END"}]
+ Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(_col4 / _col5) ASC NULLS FIRST","partition by:":"(grouping(_col6, 1) + grouping(_col6, 0)), CASE WHEN ((grouping(_col6, 0) = 0)) THEN (_col0) ELSE (null) END"}]
Select Operator [SEL_28] (rows=1149975358 width=88)
Output:["_col0","_col1","_col4","_col5","_col6"]
<-Reducer 5 [SIMPLE_EDGE]
SHUFFLE [RS_27]
- PartitionCols:(grouping(_col6, 1) + grouping(_col6, 0)), CASE WHEN ((UDFToInteger(grouping(_col6, 0)) = 0)) THEN (_col0) ELSE (null) END
+ PartitionCols:(grouping(_col6, 1) + grouping(_col6, 0)), CASE WHEN ((grouping(_col6, 0) = 0)) THEN (_col0) ELSE (null) END
Select Operator [SEL_26] (rows=1149975358 width=88)
Output:["_col0","_col1","_col4","_col5","_col6"]
Group By Operator [GBY_25] (rows=1149975358 width=88)
http://git-wip-us.apache.org/repos/asf/hive/blob/1a1e8357/ql/src/test/results/clientpositive/perf/query70.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query70.q.out b/ql/src/test/results/clientpositive/perf/query70.q.out
index bf90cdd..96fba40 100644
--- a/ql/src/test/results/clientpositive/perf/query70.q.out
+++ b/ql/src/test/results/clientpositive/perf/query70.q.out
@@ -101,12 +101,12 @@ Stage-0
Select Operator [SEL_57] (rows=1149975358 width=88)
Output:["_col0","_col1","_col2","_col3","_col4"]
PTF Operator [PTF_56] (rows=1149975358 width=88)
- Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 DESC NULLS LAST","partition by:":"(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((UDFToInteger(grouping(_col5, 0)) = 0)) THEN (_col0) ELSE (null) END"}]
+ Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 DESC NULLS LAST","partition by:":"(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END"}]
Select Operator [SEL_55] (rows=1149975358 width=88)
Output:["_col0","_col1","_col4","_col5"]
<-Reducer 5 [SIMPLE_EDGE]
SHUFFLE [RS_54]
- PartitionCols:(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((UDFToInteger(grouping(_col5, 0)) = 0)) THEN (_col0) ELSE (null) END
+ PartitionCols:(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END
Select Operator [SEL_53] (rows=1149975358 width=88)
Output:["_col0","_col1","_col4","_col5"]
Group By Operator [GBY_52] (rows=1149975358 width=88)
http://git-wip-us.apache.org/repos/asf/hive/blob/1a1e8357/ql/src/test/results/clientpositive/perf/query86.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query86.q.out b/ql/src/test/results/clientpositive/perf/query86.q.out
index 6377c43..734e6a4 100644
--- a/ql/src/test/results/clientpositive/perf/query86.q.out
+++ b/ql/src/test/results/clientpositive/perf/query86.q.out
@@ -72,12 +72,12 @@ Stage-0
Select Operator [SEL_24] (rows=261364852 width=135)
Output:["_col0","_col1","_col2","_col3","_col4"]
PTF Operator [PTF_23] (rows=261364852 width=135)
- Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 DESC NULLS LAST","partition by:":"(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((UDFToInteger(grouping(_col5, 0)) = 0)) THEN (_col0) ELSE (null) END"}]
+ Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 DESC NULLS LAST","partition by:":"(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END"}]
Select Operator [SEL_22] (rows=261364852 width=135)
Output:["_col0","_col1","_col4","_col5"]
<-Reducer 4 [SIMPLE_EDGE]
SHUFFLE [RS_21]
- PartitionCols:(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((UDFToInteger(grouping(_col5, 0)) = 0)) THEN (_col0) ELSE (null) END
+ PartitionCols:(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((grouping(_col5, 0) = 0)) THEN (_col0) ELSE (null) END
Select Operator [SEL_20] (rows=261364852 width=135)
Output:["_col0","_col1","_col4","_col5"]
Group By Operator [GBY_19] (rows=261364852 width=135)