You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2019/03/16 15:51:32 UTC
[hive] branch master updated: HIVE-16924 : Support distinct in
presence of Group By (Miklos Gergely via Zoltan Haindrich)
This is an automated email from the ASF dual-hosted git repository.
hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 36bd89d HIVE-16924 : Support distinct in presence of Group By (Miklos Gergely via Zoltan Haindrich)
36bd89d is described below
commit 36bd89d2e2f29f84fe646499a3a1ca12e435ccdf
Author: Miklos Gergely <mg...@hortonworks.com>
AuthorDate: Sat Mar 16 08:49:40 2019 -0700
HIVE-16924 : Support distinct in presence of Group By (Miklos Gergely via Zoltan Haindrich)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
---
.../java/org/apache/hadoop/hive/ql/ErrorMsg.java | 2 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 243 +--
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 70 +-
.../clientnegative/selectDistinctStarNeg_2.q | 4 -
.../wrong_distinct_group_by_without_cbo.q | 5 +
.../test/queries/clientpositive/distinct_groupby.q | 76 +
ql/src/test/queries/negative/wrong_distinct1.q | 3 -
.../clientnegative/distinct_missing_groupby.q.out | 1 +
.../clientnegative/selectDistinctStarNeg_2.q.out | 1 -
.../clientnegative/udaf_invalid_place.q.out | 1 -
.../wrong_distinct_group_by_without_cbo.q.out | 1 +
.../results/clientpositive/distinct_groupby.q.out | 2185 ++++++++++++++++++++
.../results/compiler/errors/wrong_distinct1.q.out | 2 -
13 files changed, 2431 insertions(+), 163 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 83053d1..554df3c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -93,7 +93,7 @@ public enum ErrorMsg {
INVALID_MAPINDEX_CONSTANT(10031, "Non-constant expression for map indexes not supported"),
INVALID_MAPINDEX_TYPE(10032, "MAP key type does not match index expression type"),
NON_COLLECTION_TYPE(10033, "[] not valid on non-collection types"),
- SELECT_DISTINCT_WITH_GROUPBY(10034, "SELECT DISTINCT and GROUP BY can not be in the same query"),
+ @Deprecated SELECT_DISTINCT_WITH_GROUPBY(10034, "SELECT DISTINCT and GROUP BY can not be in the same query"),
COLUMN_REPEATED_IN_PARTITIONING_COLS(10035, "Column repeated in partitioning columns"),
DUPLICATE_COLUMN_NAMES(10036, "Duplicate column name:"),
INVALID_BUCKET_NUMBER(10037, "Bucket number should be bigger than zero"),
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index f0b6f8f..98e94e2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -303,6 +303,8 @@ import java.util.Properties;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
import javax.sql.DataSource;
@@ -864,7 +866,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
// Now check QB in more detail. canHandleQbForCbo returns null if query can
// be handled.
- msg = CalcitePlanner.canHandleQbForCbo(queryProperties, conf, true, needToLogMessage, qb);
+ msg = CalcitePlanner.canHandleQbForCbo(queryProperties, conf, true, needToLogMessage);
if (msg == null) {
return Pair.of(true, msg);
}
@@ -892,8 +894,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
* Query<br>
* 2. Nested Subquery will return false for qbToChk.getIsQuery()
*/
- static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf,
- boolean topLevelQB, boolean verbose, QB qb) {
+ private static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf,
+ boolean topLevelQB, boolean verbose) {
if (!queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy()
&& !queryProperties.hasSortBy() && !queryProperties.hasPTF() && !queryProperties.usesScript()
@@ -2523,8 +2525,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
throw new SemanticException(ErrorMsg.UNSUPPORTED_SET_OPERATOR.getMsg(opcode.toString()));
}
relToHiveRR.put(setOpRel, setOpOutRR);
- relToHiveColNameCalcitePosMap.put(setOpRel,
- this.buildHiveToCalciteColumnMap(setOpOutRR, setOpRel));
+ relToHiveColNameCalcitePosMap.put(setOpRel, buildHiveToCalciteColumnMap(setOpOutRR));
return setOpRel;
}
@@ -2662,7 +2663,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
if (!RowResolver.add(joinRR, newLeftRR)) {
LOG.warn("Duplicates detected when adding columns to RR: see previous message");
}
- relToHiveColNameCalcitePosMap.put(topRel, this.buildHiveToCalciteColumnMap(joinRR, topRel));
+ relToHiveColNameCalcitePosMap.put(topRel, buildHiveToCalciteColumnMap(joinRR));
relToHiveRR.put(topRel, joinRR);
// Introduce top project operator to remove additional column(s) that have
@@ -2693,7 +2694,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
// 4. Add new rel & its RR to the maps
- relToHiveColNameCalcitePosMap.put(topRel, this.buildHiveToCalciteColumnMap(topRR, topRel));
+ relToHiveColNameCalcitePosMap.put(topRel, buildHiveToCalciteColumnMap(topRR));
relToHiveRR.put(topRel, topRR);
return topRel;
}
@@ -2746,15 +2747,14 @@ public class CalcitePlanner extends SemanticAnalyzer {
if ((left.getToken().getType() == HiveParser.TOK_TABREF)
|| (left.getToken().getType() == HiveParser.TOK_SUBQUERY)
|| (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) {
- String tableName = SemanticAnalyzer.getUnescapedUnqualifiedTableName(
- (ASTNode) left.getChild(0)).toLowerCase();
- leftTableAlias = left.getChildCount() == 1 ? tableName : SemanticAnalyzer
- .unescapeIdentifier(left.getChild(left.getChildCount() - 1).getText().toLowerCase());
+ String tableName = getUnescapedUnqualifiedTableName((ASTNode) left.getChild(0)).toLowerCase();
+ leftTableAlias = left.getChildCount() == 1 ? tableName :
+ unescapeIdentifier(left.getChild(left.getChildCount() - 1).getText().toLowerCase());
// ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
// partitionTableFunctionSource partitioningSpec? expression*)
// guranteed to have an lias here: check done in processJoin
- leftTableAlias = (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? SemanticAnalyzer
- .unescapeIdentifier(left.getChild(1).getText().toLowerCase()) : leftTableAlias;
+ leftTableAlias = (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ?
+ unescapeIdentifier(left.getChild(1).getText().toLowerCase()) : leftTableAlias;
leftRel = aliasToRel.get(leftTableAlias);
} else if (SemanticAnalyzer.isJoinToken(left)) {
leftRel = genJoinLogicalPlan(left, aliasToRel);
@@ -2770,15 +2770,14 @@ public class CalcitePlanner extends SemanticAnalyzer {
if ((right.getToken().getType() == HiveParser.TOK_TABREF)
|| (right.getToken().getType() == HiveParser.TOK_SUBQUERY)
|| (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) {
- String tableName = SemanticAnalyzer.getUnescapedUnqualifiedTableName(
- (ASTNode) right.getChild(0)).toLowerCase();
- rightTableAlias = right.getChildCount() == 1 ? tableName : SemanticAnalyzer
- .unescapeIdentifier(right.getChild(right.getChildCount() - 1).getText().toLowerCase());
+ String tableName = getUnescapedUnqualifiedTableName((ASTNode) right.getChild(0)).toLowerCase();
+ rightTableAlias = right.getChildCount() == 1 ? tableName :
+ unescapeIdentifier(right.getChild(right.getChildCount() - 1).getText().toLowerCase());
// ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
// partitionTableFunctionSource partitioningSpec? expression*)
// guranteed to have an lias here: check done in processJoin
- rightTableAlias = (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? SemanticAnalyzer
- .unescapeIdentifier(right.getChild(1).getText().toLowerCase()) : rightTableAlias;
+ rightTableAlias = (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ?
+ unescapeIdentifier(right.getChild(1).getText().toLowerCase()) : rightTableAlias;
rightRel = aliasToRel.get(rightTableAlias);
} else if (right.getToken().getType() == HiveParser.TOK_LATERAL_VIEW) {
rightRel = genLateralViewPlans(right, aliasToRel);
@@ -2819,7 +2818,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
// Virtual Cols
// 3.1 Add Column info for non partion cols (Object Inspector fields)
- @SuppressWarnings("deprecation")
StructObjectInspector rowObjectInspector = (StructObjectInspector) tabMetaData.getDeserializer()
.getObjectInspector();
List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
@@ -2998,8 +2996,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
// 6. Add Schema(RR) to RelNode-Schema map
- ImmutableMap<String, Integer> hiveToCalciteColMap = buildHiveToCalciteColumnMap(rr,
- tableRel);
+ ImmutableMap<String, Integer> hiveToCalciteColMap = buildHiveToCalciteColumnMap(rr);
relToHiveRR.put(tableRel, rr);
relToHiveColNameCalcitePosMap.put(tableRel, hiveToCalciteColMap);
} catch (Exception e) {
@@ -3186,17 +3183,16 @@ public class CalcitePlanner extends SemanticAnalyzer {
case HiveParser.TOK_TABREF:
case HiveParser.TOK_SUBQUERY:
case HiveParser.TOK_PTBLFUNCTION:
- String inputTableName = SemanticAnalyzer.getUnescapedUnqualifiedTableName(
- (ASTNode) next.getChild(0)).toLowerCase();
+ String inputTableName = getUnescapedUnqualifiedTableName((ASTNode) next.getChild(0)).toLowerCase();
String inputTableAlias;
if (next.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) {
// ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
// partitionTableFunctionSource partitioningSpec? expression*)
// ptf node guaranteed to have an alias here
- inputTableAlias = SemanticAnalyzer.unescapeIdentifier(next.getChild(1).getText().toLowerCase());
+ inputTableAlias = unescapeIdentifier(next.getChild(1).getText().toLowerCase());
} else {
inputTableAlias = next.getChildCount() == 1 ? inputTableName :
- SemanticAnalyzer.unescapeIdentifier(next.getChild(next.getChildCount() - 1).getText().toLowerCase());
+ unescapeIdentifier(next.getChild(next.getChildCount() - 1).getText().toLowerCase());
}
inputRel = aliasToRel.get(inputTableAlias);
break;
@@ -3233,8 +3229,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
(ASTNode) functionCall.getChild(1);
// Output types. They will be the concatenation of the input refs types and
// the types of the expressions for the lateral view generated rows
- List<RelDataType> outputFieldTypes = new ArrayList<>(inputRefsTypes);
- List<String> outputFieldNames = new ArrayList<>(inputRel.getRowType().getFieldNames());
// Generate all expressions from lateral view
ExprNodeDesc valuesExpr = genExprNodeDesc(valuesClause, inputRR, false);
RexCall convertedOriginalValuesExpr = (RexCall) new RexNodeConverter(this.cluster, inputRel.getRowType(),
@@ -3306,7 +3300,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
columnAliases.add(SemanticAnalyzer.getColumnInternalName(i));
}
}
- int numInputExprs = inputRR.getColumnInfos().size();
ListTypeInfo listTypeInfo = (ListTypeInfo) valuesExpr.getTypeInfo(); // Array should have ListTypeInfo
StructTypeInfo typeInfos = (StructTypeInfo) listTypeInfo.getListElementTypeInfo(); // Within the list, we extract types
for (int i = 0, j = 0; i < columnAliases.size(); i++) {
@@ -3318,8 +3311,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
new ColumnInfo(internalColName, typeInfos.getAllStructFieldTypeInfos().get(i),
tableAlias, false));
}
- this.relToHiveColNameCalcitePosMap
- .put(htfsRel, buildHiveToCalciteColumnMap(outputRR, htfsRel));
+ this.relToHiveColNameCalcitePosMap.put(htfsRel, buildHiveToCalciteColumnMap(outputRR));
this.relToHiveRR.put(htfsRel, outputRR);
// 4) Return new operator
@@ -3391,8 +3383,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel,
- Map<String, RelNode> aliasToRel, ImmutableMap<String, Integer> outerNameToPosMap,
- RowResolver outerRR, boolean forHavingClause) throws SemanticException {
+ ImmutableMap<String, Integer> outerNameToPosMap, RowResolver outerRR, boolean forHavingClause)
+ throws SemanticException {
Map<ASTNode, RelNode> subQueryToRelNode = new HashMap<>();
boolean isSubQuery = genSubQueryRelNode(qb, searchCond, srcRel, forHavingClause,
@@ -3419,39 +3411,15 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
}
- private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns) throws SemanticException {
- RowResolver iRR = relToHiveRR.get(srcRel);
- RowResolver oRR = new RowResolver();
- RowResolver.add(oRR, iRR, numColumns);
-
- List<RexNode> calciteColLst = new ArrayList<RexNode>();
- List<String> oFieldNames = new ArrayList<String>();
- RelDataType iType = srcRel.getRowType();
-
- for (int i = 0; i < iType.getFieldCount(); i++) {
- RelDataTypeField fType = iType.getFieldList().get(i);
- String fName = iType.getFieldNames().get(i);
- calciteColLst.add(cluster.getRexBuilder().makeInputRef(fType.getType(), i));
- oFieldNames.add(fName);
- }
-
- HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, oFieldNames);
-
- this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(oRR, selRel));
- this.relToHiveRR.put(selRel, oRR);
- return selRel;
- }
-
- private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, Map<String, RelNode> aliasToRel,
- ImmutableMap<String, Integer> outerNameToPosMap, RowResolver outerRR,
- boolean forHavingClause) throws SemanticException {
+ private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, ImmutableMap<String, Integer> outerNameToPosMap,
+ RowResolver outerRR, boolean forHavingClause) throws SemanticException {
RelNode filterRel = null;
Iterator<ASTNode> whereClauseIterator = getQBParseInfo(qb).getDestToWhereExpr().values()
.iterator();
if (whereClauseIterator.hasNext()) {
filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0), srcRel,
- aliasToRel, outerNameToPosMap, outerRR, forHavingClause);
+ outerNameToPosMap, outerRR, forHavingClause);
}
return filterRel;
@@ -3475,9 +3443,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
}
- private AggregateCall convertGBAgg(AggInfo agg, RelNode input, List<RexNode> gbChildProjLst,
- RexNodeConverter converter, HashMap<String, Integer> rexNodeToPosMap,
- Integer childProjLstIndx) throws SemanticException {
+ private AggregateCall convertGBAgg(AggInfo agg, List<RexNode> gbChildProjLst, RexNodeConverter converter,
+ HashMap<String, Integer> rexNodeToPosMap, Integer childProjLstIndx) throws SemanticException {
// 1. Get agg fn ret type in Calcite
RelDataType aggFnRetType = TypeConverter.convert(agg.m_returnType,
@@ -3551,7 +3518,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
List<AggregateCall> aggregateCalls = Lists.newArrayList();
for (AggInfo agg : aggInfoLst) {
- aggregateCalls.add(convertGBAgg(agg, srcRel, gbChildProjLst, converter, rexNodeToPosMap,
+ aggregateCalls.add(convertGBAgg(agg, gbChildProjLst, converter, rexNodeToPosMap,
gbChildProjLst.size()));
}
if (hasGroupSets) {
@@ -3596,12 +3563,12 @@ public class CalcitePlanner extends SemanticAnalyzer {
RowResolver gByInputRR, RowResolver gByRR) {
if (gByExpr.getType() == HiveParser.DOT
&& gByExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL) {
- String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getChild(0)
+ String tab_alias = unescapeIdentifier(gByExpr.getChild(0).getChild(0)
.getText().toLowerCase());
- String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(1).getText().toLowerCase());
+ String col_alias = unescapeIdentifier(gByExpr.getChild(1).getText().toLowerCase());
gByRR.put(tab_alias, col_alias, colInfo);
} else if (gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL) {
- String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getText().toLowerCase());
+ String col_alias = unescapeIdentifier(gByExpr.getChild(0).getText().toLowerCase());
String tab_alias = null;
/*
* If the input to the GBy has a tab alias for the column, then add an
@@ -3655,7 +3622,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
TypeInfo udafRetType = null;
// 3.1 Obtain UDAF name
- String aggName = SemanticAnalyzer.unescapeIdentifier(aggAst.getChild(0).getText());
+ String aggName = unescapeIdentifier(aggAst.getChild(0).getText());
// 3.2 Rank functions type is 'int'/'double'
if (FunctionRegistry.isRankingFunction(aggName)) {
@@ -3832,7 +3799,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
for (ASTNode value : aggregationTrees.values()) {
// 6.1 Determine type of UDAF
// This is the GenericUDAF name
- String aggName = SemanticAnalyzer.unescapeIdentifier(value.getChild(0).getText());
+ String aggName = unescapeIdentifier(value.getChild(0).getText());
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
@@ -3874,8 +3841,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
// 8. We create the group_by operator
gbRel = genGBRelNode(gbExprNDescLst, aggregations, groupingSets, srcRel);
- relToHiveColNameCalcitePosMap.put(gbRel,
- buildHiveToCalciteColumnMap(groupByOutputRowResolver, gbRel));
+ relToHiveColNameCalcitePosMap.put(gbRel, buildHiveToCalciteColumnMap(groupByOutputRowResolver));
this.relToHiveRR.put(gbRel, groupByOutputRowResolver);
}
@@ -4082,8 +4048,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
// rowtype of sortrel is the type of it child; if child happens to be
// synthetic project that we introduced then that projectrel would
// contain the vc.
- ImmutableMap<String, Integer> hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(
- outputRR, sortRel);
+ ImmutableMap<String, Integer> hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR);
relToHiveRR.put(sortRel, outputRR);
relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap);
@@ -4125,8 +4090,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
RowResolver inputRR = relToHiveRR.get(srcRel);
RowResolver outputRR = inputRR.duplicate();
- ImmutableMap<String, Integer> hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(
- outputRR, sortRel);
+ ImmutableMap<String, Integer> hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR);
relToHiveRR.put(sortRel, outputRR);
relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap);
}
@@ -4179,7 +4143,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
return oKeys;
}
- private RexWindowBound getBound(BoundarySpec bs, RexNodeConverter converter) {
+ private RexWindowBound getBound(BoundarySpec bs) {
RexWindowBound rwb = null;
if (bs != null) {
@@ -4233,8 +4197,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
return wi;
}
- private Pair<RexNode, TypeInfo> genWindowingProj(QB qb, WindowExpressionSpec wExpSpec,
- RelNode srcRel) throws SemanticException {
+ private Pair<RexNode, TypeInfo> genWindowingProj(WindowExpressionSpec wExpSpec, RelNode srcRel)
+ throws SemanticException {
RexNode w = null;
TypeInfo wHiveRetType = null;
@@ -4275,8 +4239,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
WindowSpec wndSpec = ((WindowFunctionSpec) wExpSpec).getWindowSpec();
List<RexNode> partitionKeys = getPartitionKeys(wndSpec.getPartition(), converter, inputRR);
List<RexFieldCollation> orderKeys = getOrderKeys(wndSpec.getOrder(), converter, inputRR);
- RexWindowBound upperBound = getBound(wndSpec.getWindowFrame().getStart(), converter);
- RexWindowBound lowerBound = getBound(wndSpec.getWindowFrame().getEnd(), converter);
+ RexWindowBound upperBound = getBound(wndSpec.getWindowFrame().getStart());
+ RexWindowBound lowerBound = getBound(wndSpec.getWindowFrame().getEnd());
boolean isRows = wndSpec.getWindowFrame().getWindowType() == WindowType.ROWS;
w = cluster.getRexBuilder().makeOver(calciteAggFnRetType, calciteAggFn, calciteAggFnArgs,
@@ -4328,7 +4292,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
!cubeRollupGrpSetPresent));
}
if (out_rwsch.getExpression(wExprSpec.getExpression()) == null) {
- Pair<RexNode, TypeInfo> wtp = genWindowingProj(qb, wExprSpec, srcRel);
+ Pair<RexNode, TypeInfo> wtp = genWindowingProj(wExprSpec, srcRel);
projsForWindowSelOp.add(wtp.getKey());
// 6.2.2 Update Output Row Schema
@@ -4401,8 +4365,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, columnNames);
// 4. Keep track of colname-to-posmap && RR for new select
- this.relToHiveColNameCalcitePosMap
- .put(selRel, buildHiveToCalciteColumnMap(out_rwsch, selRel));
+ this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(out_rwsch));
this.relToHiveRR.put(selRel, out_rwsch);
return selRel;
@@ -4426,26 +4389,31 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
}
+ private Pair<RelNode, RowResolver> genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel,
+ ImmutableMap<String, Integer> outerNameToPosMap, RowResolver outerRR, boolean isAllColRefRewrite)
+ throws SemanticException {
+ Pair<RelNode, RowResolver> retNodeRR = internalGenSelectLogicalPlan(qb, srcRel, starSrcRel, outerNameToPosMap,
+ outerRR, isAllColRefRewrite);
+
+ QBParseInfo qbp = getQBParseInfo(qb);
+ String selClauseName = qbp.getClauseNames().iterator().next();
+ ASTNode selExprList = qbp.getSelForClause(selClauseName);
+ if (isSelectDistinct(selExprList) && hasGroupBySibling(selExprList)) {
+ retNodeRR = genGBSelectDistinctPlan(retNodeRR);
+ }
+
+ return retNodeRR;
+ }
+
/**
- * NOTE: there can only be one select caluse since we don't handle multi
- * destination insert.
- *
- * @throws SemanticException
- */
- /**
- * @param qb
- * @param srcRel
- * @param starSrcRel
- * @param outerNameToPosMap
- * @param outerRR
+ * NOTE: there can only be one select caluse since we don't handle multi destination insert.
* @param isAllColRefRewrite
* when it is true, it means that it is called from group by *, where we use
* genSelectLogicalPlan to rewrite *
* @return RelNode: the select relnode RowResolver: i.e., originalRR, the RR after select when there is an order by.
- * @throws SemanticException
*/
- private Pair<RelNode,RowResolver> genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel,
- ImmutableMap<String, Integer> outerNameToPosMap, RowResolver outerRR, boolean isAllColRefRewrite)
+ private Pair<RelNode, RowResolver> internalGenSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel,
+ ImmutableMap<String, Integer> outerNameToPosMap, RowResolver outerRR, boolean isAllColRefRewrite)
throws SemanticException {
// 0. Generate a Select Node for Windowing
// Exclude the newly-generated select columns from */etc. resolution.
@@ -4619,36 +4587,35 @@ public class CalcitePlanner extends SemanticAnalyzer {
// 6.4 Build ExprNode corresponding to colums
if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
- pos = genColListRegex(".*", expr.getChildCount() == 0 ? null : SemanticAnalyzer
- .getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, col_list,
+ pos = genColListRegex(".*", expr.getChildCount() == 0 ? null :
+ getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, col_list,
excludedColumns, inputRR, starRR, pos, out_rwsch, qb.getAliases(), true);
selectStar = true;
} else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL
&& !hasAsClause
&& !inputRR.getIsExprResolver()
&& SemanticAnalyzer.isRegex(
- SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()), conf)) {
+ unescapeIdentifier(expr.getChild(0).getText()), conf)) {
// In case the expression is a regex COL.
// This can only happen without AS clause
// We don't allow this for ExprResolver - the Group By case
- pos = genColListRegex(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()),
- null, expr, col_list, excludedColumns, inputRR, starRR, pos, out_rwsch,
- qb.getAliases(), true);
+ pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()), null, expr, col_list, excludedColumns,
+ inputRR, starRR, pos, out_rwsch, qb.getAliases(), true);
} else if (expr.getType() == HiveParser.DOT
&& expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
- && inputRR.hasTableAlias(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0)
+ && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0)
.getChild(0).getText().toLowerCase()))
&& !hasAsClause
&& !inputRR.getIsExprResolver()
&& SemanticAnalyzer.isRegex(
- SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()), conf)) {
+ unescapeIdentifier(expr.getChild(1).getText()), conf)) {
// In case the expression is TABLE.COL (col can be regex).
// This can only happen without AS clause
// We don't allow this for ExprResolver - the Group By case
pos = genColListRegex(
- SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()),
- SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0).getText()
- .toLowerCase()), expr, col_list, excludedColumns, inputRR, starRR, pos,
+ unescapeIdentifier(expr.getChild(1).getText()),
+ unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()),
+ expr, col_list, excludedColumns, inputRR, starRR, pos,
out_rwsch, qb.getAliases(), true);
} else if (ParseUtils.containsTokenOfType(expr, HiveParser.TOK_FUNCTIONDI)
&& !(srcRel instanceof HiveAggregate)) {
@@ -4714,7 +4681,13 @@ public class CalcitePlanner extends SemanticAnalyzer {
// TODO: support unselected columns in genericUDTF and windowing functions.
// We examine the order by in this query block and adds in column needed
// by order by in select list.
- if (obAST != null && !(selForWindow != null && selExprList.getToken().getType() == HiveParser.TOK_SELECTDI) && !isAllColRefRewrite) {
+ //
+ // If DISTINCT is present, it is not possible to ORDER BY unselected
+ // columns, and in fact adding all columns would change the behavior of
+ // DISTINCT, so we bypass this logic.
+ if (obAST != null
+ && selExprList.getToken().getType() != HiveParser.TOK_SELECTDI
+ && !isAllColRefRewrite) {
// 1. OB Expr sanity test
// in strict mode, in the presence of order by, limit must be
// specified
@@ -4767,8 +4740,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
groupByOutputRowResolver.put(colInfo.getTabAlias(), colInfo.getAlias(), newColInfo);
}
- relToHiveColNameCalcitePosMap.put(outputRel,
- buildHiveToCalciteColumnMap(groupByOutputRowResolver, outputRel));
+ relToHiveColNameCalcitePosMap.put(outputRel, buildHiveToCalciteColumnMap(groupByOutputRowResolver));
this.relToHiveRR.put(outputRel, groupByOutputRowResolver);
}
@@ -4883,12 +4855,34 @@ public class CalcitePlanner extends SemanticAnalyzer {
RelNode udtf = HiveTableFunctionScan.create(cluster, traitSet, list, rexNode, null, retType,
null);
// Add new rel & its RR to the maps
- relToHiveColNameCalcitePosMap.put(udtf, this.buildHiveToCalciteColumnMap(out_rwsch, udtf));
+ relToHiveColNameCalcitePosMap.put(udtf, buildHiveToCalciteColumnMap(out_rwsch));
relToHiveRR.put(udtf, out_rwsch);
return udtf;
}
+ private Pair<RelNode, RowResolver> genGBSelectDistinctPlan(Pair<RelNode, RowResolver> srcNodeRR)
+ throws SemanticException {
+ RelNode srcRel = srcNodeRR.left;
+
+ RelDataType inputRT = srcRel.getRowType();
+ List<Integer> groupSetPositions =
+ IntStream.range(0, inputRT.getFieldCount()).boxed().collect(Collectors.toList());
+
+ HiveAggregate distAgg = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), srcRel,
+ ImmutableBitSet.of(groupSetPositions), null, new ArrayList<AggregateCall>());
+
+ // This comes from genSelectLogicalPlan, must be a project assert srcRel instanceof HiveProject;
+ RowResolver outputRR = srcNodeRR.right;
+ if (outputRR == null) {
+ outputRR = relToHiveRR.get(srcRel);
+ }
+
+ relToHiveRR.put(distAgg, outputRR);
+ relToHiveColNameCalcitePosMap.put(distAgg, relToHiveColNameCalcitePosMap.get(srcRel));
+ return new Pair<RelNode, RowResolver>(distAgg, outputRR);
+ }
+
private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException {
switch (qbexpr.getOpcode()) {
case NULLOP:
@@ -4923,7 +4917,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
// 0. Check if we can handle the SubQuery;
// canHandleQbForCbo returns null if the query can be handled.
- String reason = canHandleQbForCbo(queryProperties, conf, false, LOG.isDebugEnabled(), qb);
+ String reason = canHandleQbForCbo(queryProperties, conf, false, LOG.isDebugEnabled());
if (reason != null) {
String msg = "CBO can not handle Sub Query";
if (LOG.isDebugEnabled()) {
@@ -4986,7 +4980,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
// 2. Build Rel for where Clause
- filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, outerNameToPosMap, outerRR, false);
+ filterRel = genFilterLogicalPlan(qb, srcRel, outerNameToPosMap, outerRR, false);
srcRel = (filterRel == null) ? srcRel : filterRel;
RelNode starSrcRel = srcRel;
@@ -4995,7 +4989,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
srcRel = (gbRel == null) ? srcRel : gbRel;
// 4. Build Rel for GB Having Clause
- gbHavingRel = genGBHavingLogicalPlan(qb, srcRel, aliasToRel);
+ gbHavingRel = genGBHavingLogicalPlan(qb, srcRel);
srcRel = (gbHavingRel == null) ? srcRel : gbHavingRel;
// 5. Build Rel for Select Clause
@@ -5029,7 +5023,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
newRR.putWithCheck(alias, tmp[1], colInfo.getInternalName(), newCi);
}
relToHiveRR.put(srcRel, newRR);
- relToHiveColNameCalcitePosMap.put(srcRel, buildHiveToCalciteColumnMap(newRR, srcRel));
+ relToHiveColNameCalcitePosMap.put(srcRel, buildHiveToCalciteColumnMap(newRR));
}
if (LOG.isDebugEnabled()) {
@@ -5040,8 +5034,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
return srcRel;
}
- private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel, Map<String, RelNode> aliasToRel)
- throws SemanticException {
+ private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
RelNode gbFilter = null;
QBParseInfo qbp = getQBParseInfo(qb);
String destClauseName = qbp.getClauseNames().iterator().next();
@@ -5062,7 +5055,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
targetNode = rewriteGroupingFunctionAST(getGroupByForClause(qbp, destClauseName), targetNode,
!cubeRollupGrpSetPresent);
}
- gbFilter = genFilterRelNode(qb, targetNode, srcRel, aliasToRel, null, null, true);
+ gbFilter = genFilterRelNode(qb, targetNode, srcRel, null, null, true);
}
return gbFilter;
@@ -5126,7 +5119,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
- private ImmutableMap<String, Integer> buildHiveToCalciteColumnMap(RowResolver rr, RelNode rNode) {
+ private ImmutableMap<String, Integer> buildHiveToCalciteColumnMap(RowResolver rr) {
ImmutableMap.Builder<String, Integer> b = new ImmutableMap.Builder<String, Integer>();
for (ColumnInfo ci : rr.getRowSchema().getSignature()) {
b.put(ci.getInternalName(), rr.getPosition(ci.getInternalName()));
@@ -5153,16 +5146,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException {
return qb.getParseInfo();
}
-
- private List<String> getTabAliases(RowResolver inputRR) {
- List<String> tabAliases = new ArrayList<String>(); // TODO: this should be
- // unique
- for (ColumnInfo ci : inputRR.getColumnInfos()) {
- tabAliases.add(ci.getTabAlias());
- }
-
- return tabAliases;
- }
}
/**
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 65648d9..6252013 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -26,6 +26,7 @@ import java.security.AccessControlException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
@@ -1729,10 +1730,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
if (qbp.getJoinExpr() != null) {
queryProperties.setHasJoinFollowedByGroupBy(true);
}
- if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
- throw new SemanticException(generateErrorMessage(ast,
- ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg()));
- }
qbp.setGroupByExprForClause(ctx_1.dest, ast);
skipRecursion = true;
@@ -4196,30 +4193,32 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
}
/**
- * This function is a wrapper of parseInfo.getGroupByForClause which
- * automatically translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY
- * a,b,c.
+ * Returns the GBY, if present;
+ * DISTINCT, if present, will be handled when generating the SELECT.
*/
List<ASTNode> getGroupByForClause(QBParseInfo parseInfo, String dest) throws SemanticException {
- if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
- ASTNode selectExprs = parseInfo.getSelForClause(dest);
- List<ASTNode> result = new ArrayList<ASTNode>(selectExprs == null ? 0
- : selectExprs.getChildCount());
- if (selectExprs != null) {
- for (int i = 0; i < selectExprs.getChildCount(); ++i) {
- if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == HiveParser.QUERY_HINT) {
- continue;
- }
- // table.column AS alias
- ASTNode grpbyExpr = (ASTNode) selectExprs.getChild(i).getChild(0);
- result.add(grpbyExpr);
+ ASTNode selectExpr = parseInfo.getSelForClause(dest);
+ Collection<ASTNode> aggregateFunction = parseInfo.getDestToAggregationExprs().get(dest).values();
+ if (!(this instanceof CalcitePlanner) && isSelectDistinct(selectExpr) && hasGroupBySibling(selectExpr)) {
+ throw new SemanticException("SELECT DISTINCT with GROUP BY is only supported with CBO");
+ }
+
+ if (isSelectDistinct(selectExpr) && !hasGroupBySibling(selectExpr) &&
+ !isAggregateInSelect(selectExpr, aggregateFunction)) {
+ List<ASTNode> result = new ArrayList<ASTNode>(selectExpr.getChildCount());
+ for (int i = 0; i < selectExpr.getChildCount(); ++i) {
+ if (((ASTNode) selectExpr.getChild(i)).getToken().getType() == HiveParser.QUERY_HINT) {
+ continue;
}
+ // table.column AS alias
+ ASTNode grpbyExpr = (ASTNode) selectExpr.getChild(i).getChild(0);
+ result.add(grpbyExpr);
}
return result;
} else {
+ // look for a true GBY
ASTNode grpByExprs = parseInfo.getGroupByForClause(dest);
- List<ASTNode> result = new ArrayList<ASTNode>(grpByExprs == null ? 0
- : grpByExprs.getChildCount());
+ List<ASTNode> result = new ArrayList<ASTNode>(grpByExprs == null ? 0 : grpByExprs.getChildCount());
if (grpByExprs != null) {
for (int i = 0; i < grpByExprs.getChildCount(); ++i) {
ASTNode grpbyExpr = (ASTNode) grpByExprs.getChild(i);
@@ -4232,6 +4231,35 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
}
}
+ protected boolean hasGroupBySibling(ASTNode selectExpr) {
+ boolean isGroupBy = false;
+ if (selectExpr.getParent() != null && selectExpr.getParent() instanceof Node) {
+ for (Node sibling : ((Node)selectExpr.getParent()).getChildren()) {
+ isGroupBy |= sibling instanceof ASTNode && ((ASTNode)sibling).getType() == HiveParser.TOK_GROUPBY;
+ }
+ }
+
+ return isGroupBy;
+ }
+
+ protected boolean isSelectDistinct(ASTNode expr) {
+ return expr.getType() == HiveParser.TOK_SELECTDI;
+ }
+
+ protected boolean isAggregateInSelect(Node node, Collection<ASTNode> aggregateFunction) {
+ if (node.getChildren() == null) {
+ return false;
+ }
+
+ for (Node child : node.getChildren()) {
+ if (aggregateFunction.contains(child) || isAggregateInSelect(child, aggregateFunction)) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
static String[] getColAlias(ASTNode selExpr, String defaultName,
RowResolver inputRR, boolean includeFuncName, int colNum) {
String colAlias = null;
diff --git a/ql/src/test/queries/clientnegative/selectDistinctStarNeg_2.q b/ql/src/test/queries/clientnegative/selectDistinctStarNeg_2.q
deleted file mode 100644
index cf0ac4b..0000000
--- a/ql/src/test/queries/clientnegative/selectDistinctStarNeg_2.q
+++ /dev/null
@@ -1,4 +0,0 @@
---! qt:dataset:src
--- SELECT DISTINCT and GROUP BY can not be in the same query. Error encountered near token ‘key’
-
-select distinct * from src group by key;
\ No newline at end of file
diff --git a/ql/src/test/queries/clientnegative/wrong_distinct_group_by_without_cbo.q b/ql/src/test/queries/clientnegative/wrong_distinct_group_by_without_cbo.q
new file mode 100644
index 0000000..bb614fe
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/wrong_distinct_group_by_without_cbo.q
@@ -0,0 +1,5 @@
+--! qt:dataset:src
+
+set hive.cbo.enable=false;
+
+select distinct key from src group by key
diff --git a/ql/src/test/queries/clientpositive/distinct_groupby.q b/ql/src/test/queries/clientpositive/distinct_groupby.q
new file mode 100644
index 0000000..abfef07
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/distinct_groupby.q
@@ -0,0 +1,76 @@
+--! qt:dataset:src
+--! qt:dataset:src1
+
+explain select distinct key from src1 group by key,value;
+select distinct key from src1 group by key,value;
+
+explain select distinct count(value) from src group by key;
+select distinct count(value) from src group by key;
+
+explain select distinct count(*) from src1 where key in (128,146,150);
+select distinct count(*) from src1 where key in (128,146,150);
+
+explain select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T;
+select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T;
+
+explain select distinct count(*)+1 from src1;
+select distinct count(*)+1 from src1;
+
+explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key;
+select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key;
+
+explain select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a;
+select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a;
+
+explain select distinct key from src1;
+select distinct key from src1;
+
+explain select distinct * from src1;
+select distinct * from src1;
+
+explain select distinct count(*) from src1 where key in (128,146,150) group by key;
+select distinct count(*) from src1 where key in (128,146,150) group by key;
+
+explain select distinct key, count(*) from src1 where key in (128,146,150) group by key;
+select distinct key, count(*) from src1 where key in (128,146,150) group by key;
+
+explain select distinct * from (select * from src1) as T;
+select distinct * from (select * from src1) as T;
+
+explain select distinct * from (select count(*) from src1) as T;
+select distinct * from (select count(*) from src1) as T;
+
+explain select distinct * from (select * from src1 where key in (128,146,150)) as T;
+select distinct * from (select * from src1 where key in (128,146,150)) as T;
+
+explain select distinct key from (select * from src1 where key in (128,146,150)) as T;
+select distinct key from (select * from src1 where key in (128,146,150)) as T;
+
+explain select distinct * from (select count(*) from src1 where key in (128,146,150)) as T;
+select distinct * from (select count(*) from src1 where key in (128,146,150)) as T;
+
+explain select distinct sum(key) over () from src1;
+select distinct sum(key) over () from src1;
+
+explain select distinct * from (select sum(key) over () from src1) as T;
+select distinct * from (select sum(key) over () from src1) as T;
+
+explain select distinct value, key, count(1) over (partition by value) from src1;
+select distinct value, key, count(1) over (partition by value) from src1;
+
+explain select value, key, count(1) over (partition by value) from src1 group by value, key;
+select value, key, count(1) over (partition by value) from src1 group by value, key;
+
+explain select value, key, count(1) over (partition by value) from src1;
+select value, key, count(1) over (partition by value) from src1;
+
+explain select distinct count(*)+key from src1 group by key;
+select distinct count(*)+key from src1 group by key;
+
+explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key;
+select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key;
+
+-- should not project the virtual BLOCK_OFFSET et all columns
+explain select distinct * from (select distinct * from src1) as T;
+select distinct * from (select distinct * from src1) as T;
+
diff --git a/ql/src/test/queries/negative/wrong_distinct1.q b/ql/src/test/queries/negative/wrong_distinct1.q
deleted file mode 100755
index 1e966ad..0000000
--- a/ql/src/test/queries/negative/wrong_distinct1.q
+++ /dev/null
@@ -1,3 +0,0 @@
---! qt:dataset:src
-FROM src
-INSERT OVERWRITE TABLE dest1 SELECT DISTINCT src.key, substr(src.value,4,1) GROUP BY src.key
diff --git a/ql/src/test/results/clientnegative/distinct_missing_groupby.q.out b/ql/src/test/results/clientnegative/distinct_missing_groupby.q.out
new file mode 100644
index 0000000..ec36976
--- /dev/null
+++ b/ql/src/test/results/clientnegative/distinct_missing_groupby.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10025]: Line 2:16 Expression not in GROUP BY key 'key'
diff --git a/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out b/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out
deleted file mode 100644
index bafa21f..0000000
--- a/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out
+++ /dev/null
@@ -1 +0,0 @@
-FAILED: SemanticException 4:36 SELECT DISTINCT and GROUP BY can not be in the same query. Error encountered near token 'key'
diff --git a/ql/src/test/results/clientnegative/udaf_invalid_place.q.out b/ql/src/test/results/clientnegative/udaf_invalid_place.q.out
deleted file mode 100644
index 50880e5..0000000
--- a/ql/src/test/results/clientnegative/udaf_invalid_place.q.out
+++ /dev/null
@@ -1 +0,0 @@
-FAILED: SemanticException [Error 10128]: Line 2:21 Not yet supported place for UDAF 'sum'
diff --git a/ql/src/test/results/clientnegative/wrong_distinct_group_by_without_cbo.q.out b/ql/src/test/results/clientnegative/wrong_distinct_group_by_without_cbo.q.out
new file mode 100644
index 0000000..2c97cd8
--- /dev/null
+++ b/ql/src/test/results/clientnegative/wrong_distinct_group_by_without_cbo.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException SELECT DISTINCT with GROUP BY is only supported with CBO
diff --git a/ql/src/test/results/clientpositive/distinct_groupby.q.out b/ql/src/test/results/clientpositive/distinct_groupby.q.out
new file mode 100644
index 0000000..77e354d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/distinct_groupby.q.out
@@ -0,0 +1,2185 @@
+PREHOOK: query: explain select distinct key from src1 group by key,value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct key from src1 group by key,value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string), value (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct key from src1 group by key,value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct key from src1 group by key,value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+
+128
+146
+150
+213
+224
+238
+255
+273
+278
+311
+369
+401
+406
+66
+98
+PREHOOK: query: explain select distinct count(value) from src group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct count(value) from src group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(value)
+ keys: key (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col1 (type: bigint)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct count(value) from src group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct count(value) from src group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+1
+2
+3
+4
+5
+PREHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct count(*) from src1 where key in (128,146,150)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct count(*) from src1 where key in (128,146,150)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+3
+PREHOOK: query: explain select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+3
+PREHOOK: query: explain select distinct count(*)+1 from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct count(*)+1 from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: (_col0 + 1L) (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct count(*)+1 from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct count(*)+1 from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+26
+PREHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: b
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1, _col3
+ Statistics: Num rows: 39 Data size: 7020 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(_col1), count(_col3)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), count(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+37 37
+PREHOOK: query: explain select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: key (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col1
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col1 (type: bigint)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+1
+PREHOOK: query: explain select distinct key from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct key from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct key from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct key from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+
+128
+146
+150
+213
+224
+238
+255
+273
+278
+311
+369
+401
+406
+66
+98
+PREHOOK: query: explain select distinct * from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct * from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string), value (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct * from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct * from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+
+ val_165
+ val_193
+ val_265
+ val_27
+ val_409
+ val_484
+128
+146 val_146
+150 val_150
+213 val_213
+224
+238 val_238
+255 val_255
+273 val_273
+278 val_278
+311 val_311
+369
+401 val_401
+406 val_406
+66 val_66
+98 val_98
+PREHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150) group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150) group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: key (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col1
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col1 (type: bigint)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct count(*) from src1 where key in (128,146,150) group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct count(*) from src1 where key in (128,146,150) group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+1
+PREHOOK: query: explain select distinct key, count(*) from src1 where key in (128,146,150) group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct key, count(*) from src1 where key in (128,146,150) group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: key (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct key, count(*) from src1 where key in (128,146,150) group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct key, count(*) from src1 where key in (128,146,150) group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+128 1
+146 1
+150 1
+PREHOOK: query: explain select distinct * from (select * from src1) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct * from (select * from src1) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string), value (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct * from (select * from src1) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct * from (select * from src1) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+
+ val_165
+ val_193
+ val_265
+ val_27
+ val_409
+ val_484
+128
+146 val_146
+150 val_150
+213 val_213
+224
+238 val_238
+255 val_255
+273 val_273
+278 val_278
+311 val_311
+369
+401 val_401
+406 val_406
+66 val_66
+98 val_98
+PREHOOK: query: explain select distinct * from (select count(*) from src1) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct * from (select count(*) from src1) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct * from (select count(*) from src1) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct * from (select count(*) from src1) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+25
+PREHOOK: query: explain select distinct * from (select * from src1 where key in (128,146,150)) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct * from (select * from src1 where key in (128,146,150)) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string), value (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct * from (select * from src1 where key in (128,146,150)) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct * from (select * from src1 where key in (128,146,150)) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+128
+146 val_146
+150 val_150
+PREHOOK: query: explain select distinct key from (select * from src1 where key in (128,146,150)) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct key from (select * from src1 where key in (128,146,150)) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct key from (select * from src1 where key in (128,146,150)) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct key from (select * from src1 where key in (128,146,150)) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+128
+146
+150
+PREHOOK: query: explain select distinct * from (select count(*) from src1 where key in (128,146,150)) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct * from (select count(*) from src1 where key in (128,146,150)) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct * from (select count(*) from src1 where key in (128,146,150)) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct * from (select count(*) from src1 where key in (128,146,150)) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+3
+PREHOOK: query: explain select distinct sum(key) over () from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct sum(key) over () from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: 0 (type: int)
+ sort order: +
+ Map-reduce partition columns: 0 (type: int)
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: key (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: 0 ASC NULLS FIRST
+ partition by: 0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: sum_window_0
+ arguments: _col0
+ name: sum
+ window function: GenericUDAFSumDouble
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: sum_window_0 (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: double)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: double)
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: double)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct sum(key) over () from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct sum(key) over () from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+3556.0
+PREHOOK: query: explain select distinct * from (select sum(key) over () from src1) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct * from (select sum(key) over () from src1) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: 0 (type: int)
+ sort order: +
+ Map-reduce partition columns: 0 (type: int)
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: key (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: 0 ASC NULLS FIRST
+ partition by: 0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: sum_window_0
+ arguments: _col0
+ name: sum
+ window function: GenericUDAFSumDouble
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: sum_window_0 (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: double)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: double)
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: double)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct * from (select sum(key) over () from src1) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct * from (select sum(key) over () from src1) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+3556.0
+PREHOOK: query: explain select distinct value, key, count(1) over (partition by value) from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct value, key, count(1) over (partition by value) from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: value (type: string)
+ sort order: +
+ Map-reduce partition columns: value (type: string)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: key (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: count_window_0
+ arguments: 1
+ name: count
+ window function: GenericUDAFCountEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), count_window_0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct value, key, count(1) over (partition by value) from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct value, key, count(1) over (partition by value) from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+ 7
+ 128 7
+ 224 7
+ 369 7
+val_146 146 1
+val_150 150 1
+val_165 1
+val_193 1
+val_213 213 1
+val_238 238 1
+val_255 255 1
+val_265 1
+val_27 1
+val_273 273 1
+val_278 278 1
+val_311 311 1
+val_401 401 1
+val_406 406 1
+val_409 1
+val_484 1
+val_66 66 1
+val_98 98 1
+PREHOOK: query: explain select value, key, count(1) over (partition by value) from src1 group by value, key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select value, key, count(1) over (partition by value) from src1 group by value, key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string), value (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: count_window_0
+ arguments: 1
+ name: count
+ window function: GenericUDAFCountEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), count_window_0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select value, key, count(1) over (partition by value) from src1 group by value, key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select value, key, count(1) over (partition by value) from src1 group by value, key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+ 224 4
+ 128 4
+ 369 4
+ 4
+val_146 146 1
+val_150 150 1
+val_165 1
+val_193 1
+val_213 213 1
+val_238 238 1
+val_255 255 1
+val_265 1
+val_27 1
+val_273 273 1
+val_278 278 1
+val_311 311 1
+val_401 401 1
+val_406 406 1
+val_409 1
+val_484 1
+val_66 66 1
+val_98 98 1
+PREHOOK: query: explain select value, key, count(1) over (partition by value) from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select value, key, count(1) over (partition by value) from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: value (type: string)
+ sort order: +
+ Map-reduce partition columns: value (type: string)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: key (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: count_window_0
+ arguments: 1
+ name: count
+ window function: GenericUDAFCountEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), count_window_0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select value, key, count(1) over (partition by value) from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select value, key, count(1) over (partition by value) from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+ 7
+ 7
+ 7
+ 7
+ 128 7
+ 369 7
+ 224 7
+val_146 146 1
+val_150 150 1
+val_165 1
+val_193 1
+val_213 213 1
+val_238 238 1
+val_255 255 1
+val_265 1
+val_27 1
+val_273 273 1
+val_278 278 1
+val_311 311 1
+val_401 401 1
+val_406 406 1
+val_409 1
+val_484 1
+val_66 66 1
+val_98 98 1
+PREHOOK: query: explain select distinct count(*)+key from src1 group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct count(*)+key from src1 group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: key (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: (UDFToDouble(_col1) + UDFToDouble(_col0)) (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: double)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: double)
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: double)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct count(*)+key from src1 group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct count(*)+key from src1 group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+NULL
+67.0
+99.0
+129.0
+147.0
+151.0
+214.0
+225.0
+239.0
+256.0
+274.0
+279.0
+312.0
+370.0
+402.0
+407.0
+PREHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: b
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 39 Data size: 10413 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(_col1), count(_col3)
+ keys: _col0 (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), count(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: bigint), _col2 (type: bigint)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col1 (type: bigint), _col2 (type: bigint)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint), _col1 (type: bigint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: bigint)
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: bigint), KEY._col1 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+1 1
+2 2
+3 3
+4 4
+5 5
+PREHOOK: query: explain select distinct * from (select distinct * from src1) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct * from (select distinct * from src1) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string), value (type: string)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct * from (select distinct * from src1) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct * from (select distinct * from src1) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+
+ val_165
+ val_193
+ val_265
+ val_27
+ val_409
+ val_484
+128
+146 val_146
+150 val_150
+213 val_213
+224
+238 val_238
+255 val_255
+273 val_273
+278 val_278
+311 val_311
+369
+401 val_401
+406 val_406
+66 val_66
+98 val_98
diff --git a/ql/src/test/results/compiler/errors/wrong_distinct1.q.out b/ql/src/test/results/compiler/errors/wrong_distinct1.q.out
deleted file mode 100644
index de81b5b..0000000
--- a/ql/src/test/results/compiler/errors/wrong_distinct1.q.out
+++ /dev/null
@@ -1,2 +0,0 @@
-Semantic Exception:
-3:88 SELECT DISTINCT and GROUP BY can not be in the same query. Error encountered near token 'key'
\ No newline at end of file