You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/08/25 19:16:07 UTC
svn commit: r1620377 - in /hive/branches/cbo/ql/src:
java/org/apache/hadoop/hive/ql/optimizer/optiq/
java/org/apache/hadoop/hive/ql/parse/ test/queries/clientpositive/
test/results/clientpositive/
Author: hashutosh
Date: Mon Aug 25 17:16:07 2014
New Revision: 1620377
URL: http://svn.apache.org/r1620377
Log:
HIVE-7724 : CBO: support Subquery predicates (Harish Butani via Ashutosh Chauhan)
Modified:
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q
hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java?rev=1620377&r1=1620376&r2=1620377&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java Mon Aug 25 17:16:07 2014
@@ -131,8 +131,8 @@ public class HiveOptiqUtil {
for (i = 0; i < origColEqConds.size(); i++) {
Pair<Integer, Integer> p = origColEqConds.get(i);
- RexNode leftKey = leftJoinKeys.get(p.left);
- RexNode rightKey = rightJoinKeys.get(p.right);
+ RexNode leftKey = leftJoinKeys.get(i);
+ RexNode rightKey = rightJoinKeys.get(i);
leftKeys.add(p.left);
rightKeys.add(p.right);
RexNode cond = rexBuilder.makeCall(
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java?rev=1620377&r1=1620376&r2=1620377&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java Mon Aug 25 17:16:07 2014
@@ -355,12 +355,16 @@ public class RowResolver implements Seri
// TODO: 1) How to handle collisions? 2) Should we be cloning ColumnInfo or
// not?
public static int add(RowResolver rrToAddTo, RowResolver rrToAddFrom,
- int outputColPos) throws SemanticException {
+ int outputColPos, int numColumns) throws SemanticException {
String tabAlias;
String colAlias;
String[] qualifiedColName;
+ int i = 0;
for (ColumnInfo cInfoFrmInput : rrToAddFrom.getRowSchema().getSignature()) {
+ if ( numColumns >= 0 && i == numColumns ) {
+ break;
+ }
ColumnInfo newCI = null;
qualifiedColName = rrToAddFrom.getInvRslvMap().get(
cInfoFrmInput.getInternalName());
@@ -377,11 +381,25 @@ public class RowResolver implements Seri
throw new RuntimeException("Ambigous Column Names");
rrToAddTo.put(tabAlias, colAlias, newCI);
+
+ qualifiedColName = rrToAddFrom.getAlternateMappings(cInfoFrmInput
+ .getInternalName());
+ if (qualifiedColName != null) {
+ tabAlias = qualifiedColName[0];
+ colAlias = qualifiedColName[1];
+ rrToAddTo.put(tabAlias, colAlias, newCI);
+ }
+ i++;
}
return outputColPos;
}
+ public static int add(RowResolver rrToAddTo, RowResolver rrToAddFrom,
+ int outputColPos) throws SemanticException {
+ return add(rrToAddTo, rrToAddFrom, outputColPos, -1);
+ }
+
/**
* Return a new row resolver that is combination of left RR and right RR.
* The schema will be schema of left, schema of right
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1620377&r1=1620376&r2=1620377&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Mon Aug 25 17:16:07 2014
@@ -12374,14 +12374,197 @@ public class SemanticAnalyzer extends Ba
return filterRel;
}
- private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel)
+ private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel,
+ Map<String, RelNode> aliasToRel, boolean forHavingClause)
+ throws SemanticException {
+ /*
+ * Handle Subquery predicates.
+ *
+ * Notes (8/22/14 hb):
+ * Why is this a copy of the code from {@link #genFilterPlan}
+ * - for now we will support the same behavior as non CBO route.
+ * - but plan to allow nested SubQueries(Restriction.9.m) and
+ * multiple SubQuery expressions(Restriction.8.m). This
+ * requires use to utilize Optiq's Decorrelation mechanics,
+ * and for Optiq to fix/flush out Null semantics(OPTIQ-373)
+ * - besides only the driving code has been copied. Most of
+ * the code which is SubQueryUtils and QBSubQuery is reused.
+ *
+ */
+ int numSrcColumns = srcRel.getRowType().getFieldCount();
+ List<ASTNode> subQueriesInOriginalTree = SubQueryUtils
+ .findSubQueries(searchCond);
+ if (subQueriesInOriginalTree.size() > 0) {
+
+ /*
+ * Restriction.9.m :: disallow nested SubQuery expressions.
+ */
+ if (qb.getSubQueryPredicateDef() != null) {
+ throw new SemanticException(
+ ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+ subQueriesInOriginalTree.get(0),
+ "Nested SubQuery expressions are not supported."));
+ }
+
+ /*
+ * Restriction.8.m :: We allow only 1 SubQuery expression per Query.
+ */
+ if (subQueriesInOriginalTree.size() > 1) {
+
+ throw new SemanticException(
+ ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+ subQueriesInOriginalTree.get(1),
+ "Only 1 SubQuery expression is supported."));
+ }
+
+ /*
+ * Clone the Search AST; apply all rewrites on the clone.
+ */
+ ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor
+ .dupTree(searchCond);
+ List<ASTNode> subQueries = SubQueryUtils
+ .findSubQueries(clonedSearchCond);
+
+ RowResolver inputRR = m_relToHiveRR.get(srcRel);
+
+ for (int i = 0; i < subQueries.size(); i++) {
+ ASTNode subQueryAST = subQueries.get(i);
+ ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i);
+
+ int sqIdx = qb.incrNumSubQueryPredicates();
+ clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(
+ clonedSearchCond, subQueryAST);
+
+ QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx,
+ subQueryAST, originalSubQueryAST, ctx);
+
+ if (!forHavingClause) {
+ qb.setWhereClauseSubQueryPredicate(subQuery);
+ } else {
+ qb.setHavingClauseSubQueryPredicate(subQuery);
+ }
+ String havingInputAlias = null;
+
+ if (forHavingClause) {
+ havingInputAlias = "gby_sq" + sqIdx;
+ aliasToRel.put(havingInputAlias, srcRel);
+ }
+
+ subQuery.validateAndRewriteAST(inputRR, forHavingClause,
+ havingInputAlias, aliasToRel.keySet());
+
+ QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(),
+ true);
+ qbSQ.setSubQueryDef(subQuery.getSubQuery());
+ Phase1Ctx ctx_1 = initPhase1Ctx();
+ doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1);
+ getMetaData(qbSQ);
+ RelNode subQueryRelNode = genLogicalPlan(qbSQ);
+ aliasToRel.put(subQuery.getAlias(), subQueryRelNode);
+ RowResolver sqRR = m_relToHiveRR.get(subQueryRelNode);
+
+ /*
+ * Check.5.h :: For In and Not In the SubQuery must implicitly or
+ * explicitly only contain one select item.
+ */
+ if (subQuery.getOperator().getType() != SubQueryType.EXISTS
+ && subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS
+ && sqRR.getColumnInfos().size()
+ - subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1) {
+ throw new SemanticException(
+ ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(subQueryAST,
+ "SubQuery can contain only 1 item in Select List."));
+ }
+
+ /*
+ * If this is a Not In SubQuery Predicate then Join in the Null Check
+ * SubQuery. See QBSubQuery.NotInCheck for details on why and how this
+ * is constructed.
+ */
+ if (subQuery.getNotInCheck() != null) {
+ QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck();
+ notInCheck.setSQRR(sqRR);
+ QB qbSQ_nic = new QB(subQuery.getOuterQueryId(),
+ notInCheck.getAlias(), true);
+ qbSQ_nic.setSubQueryDef(notInCheck.getSubQuery());
+ ctx_1 = initPhase1Ctx();
+ doPhase1(notInCheck.getSubQueryAST(), qbSQ_nic, ctx_1);
+ getMetaData(qbSQ_nic);
+ RelNode subQueryNICRelNode = genLogicalPlan(qbSQ_nic);
+ aliasToRel.put(notInCheck.getAlias(), subQueryNICRelNode);
+ srcRel = genJoinRelNode(srcRel, subQueryNICRelNode,
+ // set explicitly to inner until we figure out SemiJoin use
+ // notInCheck.getJoinType(),
+ JoinType.INNER, notInCheck.getJoinConditionAST());
+ inputRR = m_relToHiveRR.get(srcRel);
+ if (forHavingClause) {
+ aliasToRel.put(havingInputAlias, srcRel);
+ }
+ }
+
+ /*
+ * Gen Join between outer Operator and SQ op
+ */
+ subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause,
+ havingInputAlias);
+ srcRel = genJoinRelNode(srcRel, subQueryRelNode,
+ subQuery.getJoinType(), subQuery.getJoinConditionAST());
+ searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond);
+
+ srcRel = genFilterRelNode(searchCond, srcRel);
+
+ /*
+ * For Not Exists and Not In, add a projection on top of the Left
+ * Outer Join.
+ */
+ if (subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS
+ || subQuery.getOperator().getType() != SubQueryType.NOT_IN) {
+ srcRel = projectLeftOuterSide(srcRel, numSrcColumns);
+ }
+ }
+ return srcRel;
+ }
+
+ return genFilterRelNode(searchCond, srcRel);
+ }
+
+ private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns)
+ throws SemanticException {
+ RowResolver iRR = m_relToHiveRR.get(srcRel);
+ RowResolver oRR = new RowResolver();
+ RowResolver.add(oRR, iRR, 0, numColumns);
+
+ List<RexNode> optiqColLst = new ArrayList<RexNode>();
+ List<String> oFieldNames = new ArrayList<String>();
+ RelDataType iType = srcRel.getRowType();
+
+ for (int i = 0; i < iType.getFieldCount(); i++) {
+ RelDataTypeField fType = iType.getFieldList().get(i);
+ String fName = iType.getFieldNames().get(i);
+ optiqColLst.add(m_cluster.getRexBuilder().makeInputRef(fType.getType(),
+ i));
+ oFieldNames.add(fName);
+ }
+
+ HiveRel selRel = HiveProjectRel.create(srcRel, optiqColLst, oFieldNames);
+
+ this.m_relToHiveColNameOptiqPosMap.put(selRel,
+ buildHiveToOptiqColumnMap(oRR, selRel));
+ this.m_relToHiveRR.put(selRel, oRR);
+ return selRel;
+ }
+
+ private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel,
+ Map<String, RelNode> aliasToRel,
+ boolean forHavingClause)
throws SemanticException {
RelNode filterRel = null;
Iterator<ASTNode> whereClauseIterator = getQBParseInfo(qb)
.getDestToWhereExpr().values().iterator();
if (whereClauseIterator.hasNext()) {
- filterRel = genFilterRelNode((ASTNode) whereClauseIterator.next().getChild(0), srcRel);
+ filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0),
+ srcRel, aliasToRel, forHavingClause);
}
return filterRel;
@@ -13263,7 +13446,7 @@ public class SemanticAnalyzer extends Ba
}
// 2. Build Rel for where Clause
- filterRel = genFilterLogicalPlan(qb, srcRel);
+ filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, false);
srcRel = (filterRel == null) ? srcRel : filterRel;
// 3. Build Rel for GB Clause
@@ -13271,7 +13454,7 @@ public class SemanticAnalyzer extends Ba
srcRel = (gbRel == null) ? srcRel : gbRel;
// 4. Build Rel for GB Having Clause
- gbHavingRel = genGBHavingLogicalPlan(qb, srcRel);
+ gbHavingRel = genGBHavingLogicalPlan(qb, srcRel, aliasToRel);
srcRel = (gbHavingRel == null) ? srcRel : gbHavingRel;
// 5. Build Rel for Select Clause
@@ -13316,7 +13499,8 @@ public class SemanticAnalyzer extends Ba
return srcRel;
}
- private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel)
+ private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel,
+ Map<String, RelNode> aliasToRel)
throws SemanticException {
RelNode gbFilter = null;
QBParseInfo qbp = getQBParseInfo(qb);
@@ -13324,7 +13508,7 @@ public class SemanticAnalyzer extends Ba
.iterator().next());
if (havingClause != null)
- gbFilter = genFilterRelNode((ASTNode) havingClause.getChild(0), srcRel);
+ gbFilter = genFilterRelNode(qb, (ASTNode) havingClause.getChild(0), srcRel, aliasToRel, true);
return gbFilter;
}
Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q?rev=1620377&r1=1620376&r2=1620377&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q Mon Aug 25 17:16:07 2014
@@ -12,6 +12,45 @@ load data local inpath '../../data/files
load data local inpath '../../data/files/cbo_t2.txt' into table t2 partition (dt='2014');
load data local inpath '../../data/files/cbo_t3.txt' into table t3;
+CREATE TABLE part(
+ p_partkey INT,
+ p_name STRING,
+ p_mfgr STRING,
+ p_brand STRING,
+ p_type STRING,
+ p_size INT,
+ p_container STRING,
+ p_retailprice DOUBLE,
+ p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+DROP TABLE lineitem;
+CREATE TABLE lineitem (L_ORDERKEY INT,
+ L_PARTKEY INT,
+ L_SUPPKEY INT,
+ L_LINENUMBER INT,
+ L_QUANTITY DOUBLE,
+ L_EXTENDEDPRICE DOUBLE,
+ L_DISCOUNT DOUBLE,
+ L_TAX DOUBLE,
+ L_RETURNFLAG STRING,
+ L_LINESTATUS STRING,
+ l_shipdate STRING,
+ L_COMMITDATE STRING,
+ L_RECEIPTDATE STRING,
+ L_SHIPINSTRUCT STRING,
+ L_SHIPMODE STRING,
+ L_COMMENT STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem;
+
+create table src_cbo as select * from src;
+
+
set hive.stats.dbclass=jdbc:derby;
analyze table t1 partition (dt) compute statistics;
analyze table t1 compute statistics for columns key, value, c_int, c_float, c_boolean;
@@ -19,6 +58,12 @@ analyze table t2 partition (dt) compute
analyze table t2 compute statistics for columns key, value, c_int, c_float, c_boolean;
analyze table t3 compute statistics;
analyze table t3 compute statistics for columns key, value, c_int, c_float, c_boolean;
+analyze table src_cbo compute statistics;
+analyze table src_cbo compute statistics for columns;
+analyze table part compute statistics;
+analyze table part compute statistics for columns;
+analyze table lineitem compute statistics;
+analyze table lineitem compute statistics for columns;
set hive.stats.fetch.column.stats=true;
set hive.auto.convert.join=false;
@@ -223,3 +268,185 @@ select key from t1 where c_int = -6 or
-- 15. query referencing only partition columns
select count(t1.dt) from t1 join t2 on t1.dt = t2.dt where t1.dt = '2014' ;
+
+-- 16. SubQueries Not In
+-- non agg, non corr
+select *
+from src_cbo
+where src_cbo.key not in
+ ( select key from src_cbo s1
+ where s1.key > '2'
+ )
+;
+
+-- non agg, corr
+select p_mfgr, b.p_name, p_size
+from part b
+where b.p_name not in
+ (select p_name
+ from (select p_mfgr, p_name, p_size as r from part) a
+ where r < 10 and b.p_mfgr = a.p_mfgr
+ )
+;
+
+-- agg, non corr
+select p_name, p_size
+from
+part where part.p_size not in
+ (select avg(p_size)
+ from (select p_size from part) a
+ where p_size < 10
+ )
+;
+
+-- agg, corr
+select p_mfgr, p_name, p_size
+from part b where b.p_size not in
+ (select min(p_size)
+ from (select p_mfgr, p_size from part) a
+ where p_size < 10 and b.p_mfgr = a.p_mfgr
+ )
+;
+
+-- non agg, non corr, Group By in Parent Query
+select li.l_partkey, count(*)
+from lineitem li
+where li.l_linenumber = 1 and
+ li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR')
+group by li.l_partkey
+;
+
+-- add null check test from sq_notin.q once HIVE-7721 resolved.
+
+-- non agg, corr, having
+select b.p_mfgr, min(p_retailprice)
+from part b
+group by b.p_mfgr
+having b.p_mfgr not in
+ (select p_mfgr
+ from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a
+ where min(p_retailprice) = l and r - l > 600
+ )
+;
+
+-- agg, non corr, having
+select b.p_mfgr, min(p_retailprice)
+from part b
+group by b.p_mfgr
+having b.p_mfgr not in
+ (select p_mfgr
+ from part a
+ group by p_mfgr
+ having max(p_retailprice) - min(p_retailprice) > 600
+ )
+;
+
+-- 17. SubQueries In
+-- non agg, non corr
+select *
+from src_cbo
+where src_cbo.key in (select key from src_cbo s1 where s1.key > '9')
+;
+
+-- agg, corr
+-- add back once rank issue fixed for cbo
+
+-- distinct, corr
+select *
+from src_cbo b
+where b.key in
+ (select distinct a.key
+ from src_cbo a
+ where b.value = a.value and a.key > '9'
+ )
+;
+
+-- non agg, corr, with join in Parent Query
+select p.p_partkey, li.l_suppkey
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
+;
+
+-- where and having
+-- Plan is:
+-- Stage 1: b semijoin sq1:src_cbo (subquery in where)
+-- Stage 2: group by Stage 1 o/p
+-- Stage 5: group by on sq2:src_cbo (subquery in having)
+-- Stage 6: Stage 2 o/p semijoin Stage 5
+explain
+select key, value, count(*)
+from src_cbo b
+where b.key in (select key from src_cbo where src_cbo.key > '8')
+group by key, value
+having count(*) in (select count(*) from src_cbo s1 where s1.key > '9' group by s1.key )
+;
+
+-- non agg, non corr, windowing
+explain
+select p_mfgr, p_name, avg(p_size)
+from part
+group by p_mfgr, p_name
+having p_name in
+ (select first_value(p_name) over(partition by p_mfgr order by p_size) from part)
+;
+
+-- 18. SubQueries Not Exists
+-- distinct, corr
+select *
+from src_cbo b
+where not exists
+ (select distinct a.key
+ from src_cbo a
+ where b.value = a.value and a.value > 'val_2'
+ )
+;
+
+-- no agg, corr, having
+select *
+from src_cbo b
+group by key, value
+having not exists
+ (select a.key
+ from src_cbo a
+ where b.value = a.value and a.key = b.key and a.value > 'val_12'
+ )
+;
+
+-- 19. SubQueries Exists
+-- view test
+create view cv1 as
+select *
+from src_cbo b
+where exists
+ (select a.key
+ from src_cbo a
+ where b.value = a.value and a.key = b.key and a.value > 'val_9')
+;
+
+select * from cv1
+;
+
+-- sq in from
+select *
+from (select *
+ from src_cbo b
+ where exists
+ (select a.key
+ from src_cbo a
+ where b.value = a.value and a.key = b.key and a.value > 'val_9')
+ ) a
+;
+
+-- sq in from, having
+select *
+from (select b.key, count(*)
+ from src_cbo b
+ group by b.key
+ having exists
+ (select a.key
+ from src_cbo a
+ where a.key = b.key and a.value > 'val_9'
+ )
+) a
+;
Modified: hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out?rev=1620377&r1=1620376&r2=1620377&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out (original)
+++ hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out Mon Aug 25 17:16:07 2014
@@ -60,6 +60,103 @@ POSTHOOK: query: load data local inpath
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@t3
+PREHOOK: query: CREATE TABLE part(
+ p_partkey INT,
+ p_name STRING,
+ p_mfgr STRING,
+ p_brand STRING,
+ p_type STRING,
+ p_size INT,
+ p_container STRING,
+ p_retailprice DOUBLE,
+ p_comment STRING
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@part
+POSTHOOK: query: CREATE TABLE part(
+ p_partkey INT,
+ p_name STRING,
+ p_mfgr STRING,
+ p_brand STRING,
+ p_type STRING,
+ p_size INT,
+ p_container STRING,
+ p_retailprice DOUBLE,
+ p_comment STRING
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@part
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@part
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@part
+PREHOOK: query: DROP TABLE lineitem
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE lineitem
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT,
+ L_PARTKEY INT,
+ L_SUPPKEY INT,
+ L_LINENUMBER INT,
+ L_QUANTITY DOUBLE,
+ L_EXTENDEDPRICE DOUBLE,
+ L_DISCOUNT DOUBLE,
+ L_TAX DOUBLE,
+ L_RETURNFLAG STRING,
+ L_LINESTATUS STRING,
+ l_shipdate STRING,
+ L_COMMITDATE STRING,
+ L_RECEIPTDATE STRING,
+ L_SHIPINSTRUCT STRING,
+ L_SHIPMODE STRING,
+ L_COMMENT STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@lineitem
+POSTHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT,
+ L_PARTKEY INT,
+ L_SUPPKEY INT,
+ L_LINENUMBER INT,
+ L_QUANTITY DOUBLE,
+ L_EXTENDEDPRICE DOUBLE,
+ L_DISCOUNT DOUBLE,
+ L_TAX DOUBLE,
+ L_RETURNFLAG STRING,
+ L_LINESTATUS STRING,
+ l_shipdate STRING,
+ L_COMMITDATE STRING,
+ L_RECEIPTDATE STRING,
+ L_SHIPINSTRUCT STRING,
+ L_SHIPMODE STRING,
+ L_COMMENT STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@lineitem
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@lineitem
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@lineitem
+PREHOOK: query: create table src_cbo as select * from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: create table src_cbo as select * from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@src_cbo
PREHOOK: query: analyze table t1 partition (dt) compute statistics
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
@@ -120,6 +217,54 @@ POSTHOOK: query: analyze table t3 comput
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t3
#### A masked pattern was here ####
+PREHOOK: query: analyze table src_cbo compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_cbo
+PREHOOK: Output: default@src_cbo
+POSTHOOK: query: analyze table src_cbo compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_cbo
+POSTHOOK: Output: default@src_cbo
+PREHOOK: query: analyze table src_cbo compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table src_cbo compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+PREHOOK: query: analyze table part compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+PREHOOK: Output: default@part
+POSTHOOK: query: analyze table part compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+POSTHOOK: Output: default@part
+PREHOOK: query: analyze table part compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table part compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+PREHOOK: query: analyze table lineitem compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@lineitem
+PREHOOK: Output: default@lineitem
+POSTHOOK: query: analyze table lineitem compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@lineitem
+POSTHOOK: Output: default@lineitem
+PREHOOK: query: analyze table lineitem compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@lineitem
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table lineitem compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@lineitem
+#### A masked pattern was here ####
PREHOOK: query: -- 1. Test Select + TS
select * from t1
PREHOOK: type: QUERY
@@ -17911,3 +18056,1122 @@ POSTHOOK: Input: default@t2
POSTHOOK: Input: default@t2@dt=2014
#### A masked pattern was here ####
400
+Warning: Shuffle Join JOIN[24][tables = [$hdt$_519, $hdt$_524]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: -- 16. SubQueries Not In
+-- non agg, non corr
+select *
+from src_cbo
+where src_cbo.key not in
+ ( select key from src_cbo s1
+ where s1.key > '2'
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+POSTHOOK: query: -- 16. SubQueries Not In
+-- non agg, non corr
+select *
+from src_cbo
+where src_cbo.key not in
+ ( select key from src_cbo s1
+ where s1.key > '2'
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+0 val_0
+0 val_0
+0 val_0
+10 val_10
+100 val_100
+100 val_100
+103 val_103
+103 val_103
+104 val_104
+104 val_104
+105 val_105
+11 val_11
+111 val_111
+113 val_113
+113 val_113
+114 val_114
+116 val_116
+118 val_118
+118 val_118
+119 val_119
+119 val_119
+119 val_119
+12 val_12
+12 val_12
+120 val_120
+120 val_120
+125 val_125
+125 val_125
+126 val_126
+128 val_128
+128 val_128
+128 val_128
+129 val_129
+129 val_129
+131 val_131
+133 val_133
+134 val_134
+134 val_134
+136 val_136
+137 val_137
+137 val_137
+138 val_138
+138 val_138
+138 val_138
+138 val_138
+143 val_143
+145 val_145
+146 val_146
+146 val_146
+149 val_149
+149 val_149
+15 val_15
+15 val_15
+150 val_150
+152 val_152
+152 val_152
+153 val_153
+155 val_155
+156 val_156
+157 val_157
+158 val_158
+160 val_160
+162 val_162
+163 val_163
+164 val_164
+164 val_164
+165 val_165
+165 val_165
+166 val_166
+167 val_167
+167 val_167
+167 val_167
+168 val_168
+169 val_169
+169 val_169
+169 val_169
+169 val_169
+17 val_17
+170 val_170
+172 val_172
+172 val_172
+174 val_174
+174 val_174
+175 val_175
+175 val_175
+176 val_176
+176 val_176
+177 val_177
+178 val_178
+179 val_179
+179 val_179
+18 val_18
+18 val_18
+180 val_180
+181 val_181
+183 val_183
+186 val_186
+187 val_187
+187 val_187
+187 val_187
+189 val_189
+19 val_19
+190 val_190
+191 val_191
+191 val_191
+192 val_192
+193 val_193
+193 val_193
+193 val_193
+194 val_194
+195 val_195
+195 val_195
+196 val_196
+197 val_197
+197 val_197
+199 val_199
+199 val_199
+199 val_199
+2 val_2
+Warning: Shuffle Join JOIN[26][tables = [$hdt$_530, $hdt$_536]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: -- non agg, corr
+select p_mfgr, b.p_name, p_size
+from part b
+where b.p_name not in
+ (select p_name
+ from (select p_mfgr, p_name, p_size as r from part) a
+ where r < 10 and b.p_mfgr = a.p_mfgr
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- non agg, corr
+select p_mfgr, b.p_name, p_size
+from part b
+where b.p_name not in
+ (select p_name
+ from (select p_mfgr, p_name, p_size as r from part) a
+ where r < 10 and b.p_mfgr = a.p_mfgr
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#5 almond antique blue firebrick mint 31
+Manufacturer#3 almond antique chartreuse khaki white 17
+Manufacturer#1 almond antique chartreuse lavender yellow 34
+Manufacturer#3 almond antique forest lavender goldenrod 14
+Manufacturer#4 almond antique gainsboro frosted violet 10
+Manufacturer#3 almond antique metallic orange dim 19
+Manufacturer#3 almond antique olive coral navajo 45
+Manufacturer#2 almond antique violet chocolate turquoise 14
+Manufacturer#4 almond antique violet mint lemon 39
+Manufacturer#2 almond antique violet turquoise frosted 40
+Manufacturer#1 almond aquamarine burnished black steel 28
+Manufacturer#5 almond aquamarine dodger light gainsboro 46
+Manufacturer#4 almond aquamarine floral ivory bisque 27
+Manufacturer#1 almond aquamarine pink moccasin thistle 42
+Manufacturer#2 almond aquamarine rose maroon antique 25
+Manufacturer#2 almond aquamarine sandy cyan gainsboro 18
+Manufacturer#4 almond azure aquamarine papaya violet 12
+Manufacturer#5 almond azure blanched chiffon midnight 23
+Warning: Shuffle Join JOIN[39][tables = [$hdt$_543, $hdt$_547, $hdt$_554]] in Stage 'Stage-3:MAPRED' is a cross product
+PREHOOK: query: -- agg, non corr
+select p_name, p_size
+from
+part where part.p_size not in
+ (select avg(p_size)
+ from (select p_size from part) a
+ where p_size < 10
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- agg, non corr
+select p_name, p_size
+from
+part where part.p_size not in
+ (select avg(p_size)
+ from (select p_size from part) a
+ where p_size < 10
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+almond aquamarine dodger light gainsboro 46
+almond antique olive coral navajo 45
+almond aquamarine pink moccasin thistle 42
+almond antique violet turquoise frosted 40
+almond antique violet mint lemon 39
+almond antique chartreuse lavender yellow 34
+almond antique blue firebrick mint 31
+almond aquamarine burnished black steel 28
+almond aquamarine floral ivory bisque 27
+almond aquamarine rose maroon antique 25
+almond azure blanched chiffon midnight 23
+almond antique metallic orange dim 19
+almond aquamarine sandy cyan gainsboro 18
+almond antique chartreuse khaki white 17
+almond antique violet chocolate turquoise 14
+almond antique forest lavender goldenrod 14
+almond azure aquamarine papaya violet 12
+almond antique gainsboro frosted violet 10
+almond aquamarine yellow dodger mint 7
+almond antique salmon chartreuse burlywood 6
+almond antique medium spring khaki 6
+almond antique burnished rose metallic 2
+almond aquamarine midnight light salmon 2
+almond antique sky peru orange 2
+almond antique burnished rose metallic 2
+almond antique misty red olive 1
+Warning: Shuffle Join JOIN[36][tables = [$hdt$_558, $hdt$_565]] in Stage 'Stage-3:MAPRED' is a cross product
+PREHOOK: query: -- agg, corr
+select p_mfgr, p_name, p_size
+from part b where b.p_size not in
+ (select min(p_size)
+ from (select p_mfgr, p_size from part) a
+ where p_size < 10 and b.p_mfgr = a.p_mfgr
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- agg, corr
+select p_mfgr, p_name, p_size
+from part b where b.p_size not in
+ (select min(p_size)
+ from (select p_mfgr, p_size from part) a
+ where p_size < 10 and b.p_mfgr = a.p_mfgr
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1 almond antique salmon chartreuse burlywood 6
+Manufacturer#5 almond antique medium spring khaki 6
+Manufacturer#4 almond antique gainsboro frosted violet 10
+Manufacturer#4 almond azure aquamarine papaya violet 12
+Manufacturer#2 almond antique violet chocolate turquoise 14
+Manufacturer#3 almond antique forest lavender goldenrod 14
+Manufacturer#3 almond antique chartreuse khaki white 17
+Manufacturer#2 almond aquamarine sandy cyan gainsboro 18
+Manufacturer#3 almond antique metallic orange dim 19
+Manufacturer#5 almond azure blanched chiffon midnight 23
+Manufacturer#2 almond aquamarine rose maroon antique 25
+Manufacturer#4 almond aquamarine floral ivory bisque 27
+Manufacturer#1 almond aquamarine burnished black steel 28
+Manufacturer#5 almond antique blue firebrick mint 31
+Manufacturer#1 almond antique chartreuse lavender yellow 34
+Manufacturer#4 almond antique violet mint lemon 39
+Manufacturer#2 almond antique violet turquoise frosted 40
+Manufacturer#1 almond aquamarine pink moccasin thistle 42
+Manufacturer#3 almond antique olive coral navajo 45
+Manufacturer#5 almond aquamarine dodger light gainsboro 46
+Warning: Shuffle Join JOIN[24][tables = [$hdt$_573, $hdt$_578]] in Stage 'Stage-3:MAPRED' is a cross product
+PREHOOK: query: -- non agg, non corr, Group By in Parent Query
+select li.l_partkey, count(*)
+from lineitem li
+where li.l_linenumber = 1 and
+ li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR')
+group by li.l_partkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@lineitem
+#### A masked pattern was here ####
+POSTHOOK: query: -- non agg, non corr, Group By in Parent Query
+select li.l_partkey, count(*)
+from lineitem li
+where li.l_linenumber = 1 and
+ li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR')
+group by li.l_partkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@lineitem
+#### A masked pattern was here ####
+450 1
+7068 1
+21636 1
+22630 1
+59694 1
+61931 1
+85951 1
+88035 1
+88362 1
+106170 1
+119477 1
+119767 1
+123076 1
+139636 1
+175839 1
+182052 1
+Warning: Shuffle Join JOIN[41][tables = [$hdt$_586, $hdt$_593]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: -- add null check test from sq_notin.q once HIVE-7721 resolved.
+
+-- non agg, corr, having
+select b.p_mfgr, min(p_retailprice)
+from part b
+group by b.p_mfgr
+having b.p_mfgr not in
+ (select p_mfgr
+ from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a
+ where min(p_retailprice) = l and r - l > 600
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- add null check test from sq_notin.q once HIVE-7721 resolved.
+
+-- non agg, corr, having
+select b.p_mfgr, min(p_retailprice)
+from part b
+group by b.p_mfgr
+having b.p_mfgr not in
+ (select p_mfgr
+ from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a
+ where min(p_retailprice) = l and r - l > 600
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1 1173.15
+Manufacturer#2 1690.68
+Warning: Shuffle Join JOIN[39][tables = [$hdt$_602, $hdt$_608]] in Stage 'Stage-3:MAPRED' is a cross product
+PREHOOK: query: -- agg, non corr, having
+select b.p_mfgr, min(p_retailprice)
+from part b
+group by b.p_mfgr
+having b.p_mfgr not in
+ (select p_mfgr
+ from part a
+ group by p_mfgr
+ having max(p_retailprice) - min(p_retailprice) > 600
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- agg, non corr, having
+select b.p_mfgr, min(p_retailprice)
+from part b
+group by b.p_mfgr
+having b.p_mfgr not in
+ (select p_mfgr
+ from part a
+ group by p_mfgr
+ having max(p_retailprice) - min(p_retailprice) > 600
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1 1173.15
+Manufacturer#2 1690.68
+PREHOOK: query: -- 17. SubQueries In
+-- non agg, non corr
+select *
+from src_cbo
+where src_cbo.key in (select key from src_cbo s1 where s1.key > '9')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+POSTHOOK: query: -- 17. SubQueries In
+-- non agg, non corr
+select *
+from src_cbo
+where src_cbo.key in (select key from src_cbo s1 where s1.key > '9')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+90 val_90
+90 val_90
+90 val_90
+92 val_92
+95 val_95
+95 val_95
+96 val_96
+97 val_97
+97 val_97
+98 val_98
+98 val_98
+PREHOOK: query: -- agg, corr
+-- add back once rank issue fixed for cbo
+
+-- distinct, corr
+select *
+from src_cbo b
+where b.key in
+ (select distinct a.key
+ from src_cbo a
+ where b.value = a.value and a.key > '9'
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+POSTHOOK: query: -- agg, corr
+-- add back once rank issue fixed for cbo
+
+-- distinct, corr
+select *
+from src_cbo b
+where b.key in
+ (select distinct a.key
+ from src_cbo a
+ where b.value = a.value and a.key > '9'
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+90 val_90
+90 val_90
+90 val_90
+92 val_92
+95 val_95
+95 val_95
+96 val_96
+97 val_97
+97 val_97
+98 val_98
+98 val_98
+PREHOOK: query: -- non agg, corr, with join in Parent Query
+select p.p_partkey, li.l_suppkey
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@lineitem
+#### A masked pattern was here ####
+POSTHOOK: query: -- non agg, corr, with join in Parent Query
+select p.p_partkey, li.l_suppkey
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@lineitem
+#### A masked pattern was here ####
+4297 1798
+108570 8571
+PREHOOK: query: -- where and having
+-- Plan is:
+-- Stage 1: b semijoin sq1:src_cbo (subquery in where)
+-- Stage 2: group by Stage 1 o/p
+-- Stage 5: group by on sq2:src_cbo (subquery in having)
+-- Stage 6: Stage 2 o/p semijoin Stage 5
+explain
+select key, value, count(*)
+from src_cbo b
+where b.key in (select key from src_cbo where src_cbo.key > '8')
+group by key, value
+having count(*) in (select count(*) from src_cbo s1 where s1.key > '9' group by s1.key )
+PREHOOK: type: QUERY
+POSTHOOK: query: -- where and having
+-- Plan is:
+-- Stage 1: b semijoin sq1:src_cbo (subquery in where)
+-- Stage 2: group by Stage 1 o/p
+-- Stage 5: group by on sq2:src_cbo (subquery in having)
+-- Stage 6: Stage 2 o/p semijoin Stage 5
+explain
+select key, value, count(*)
+from src_cbo b
+where b.key in (select key from src_cbo where src_cbo.key > '8')
+group by key, value
+having count(*) in (select count(*) from src_cbo s1 where s1.key > '9' group by s1.key )
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-2, Stage-4
+ Stage-4 is a root stage
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: src_cbo
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((key > '8') and key is not null) (type: boolean)
+ Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ condition expressions:
+ 0 {KEY.reducesinkkey0} {VALUE._col0}
+ 1
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1202 Data size: 213956 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1202 Data size: 213956 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1202 Data size: 223572 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 1202 Data size: 223572 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: _col2 is not null (type: boolean)
+ Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col2 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: bigint)
+ Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1} {KEY.reducesinkkey0}
+ 1
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: s1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (key > '9') (type: boolean)
+ Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 83 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: _col1 is not null (type: boolean)
+ Statistics: Num rows: 83 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- non agg, non corr, windowing
+explain
+select p_mfgr, p_name, avg(p_size)
+from part
+group by p_mfgr, p_name
+having p_name in
+ (select first_value(p_name) over(partition by p_mfgr order by p_size) from part)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- non agg, non corr, windowing
+explain
+select p_mfgr, p_name, avg(p_size)
+from part
+group by p_mfgr, p_name
+having p_name in
+ (select first_value(p_name) over(partition by p_mfgr order by p_size) from part)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+ Reduce Operator Tree:
+ Extract
+ Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: _wcol0 is not null (type: boolean)
+ Statistics: Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ expressions: _wcol0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col2 (type: double)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1}
+ 1
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 14 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 14 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 14 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: p_name is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: avg(_col2)
+ keys: _col0 (type: string), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ value expressions: _col2 (type: struct<count:bigint,sum:double,input:int>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: avg(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: -- 18. SubQueries Not Exists
+-- distinct, corr
+select *
+from src_cbo b
+where not exists
+ (select distinct a.key
+ from src_cbo a
+ where b.value = a.value and a.value > 'val_2'
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+POSTHOOK: query: -- 18. SubQueries Not Exists
+-- distinct, corr
+select *
+from src_cbo b
+where not exists
+ (select distinct a.key
+ from src_cbo a
+ where b.value = a.value and a.value > 'val_2'
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+0 val_0
+0 val_0
+0 val_0
+10 val_10
+100 val_100
+100 val_100
+103 val_103
+103 val_103
+104 val_104
+104 val_104
+105 val_105
+11 val_11
+111 val_111
+113 val_113
+113 val_113
+114 val_114
+116 val_116
+118 val_118
+118 val_118
+119 val_119
+119 val_119
+119 val_119
+12 val_12
+12 val_12
+120 val_120
+120 val_120
+125 val_125
+125 val_125
+126 val_126
+128 val_128
+128 val_128
+128 val_128
+129 val_129
+129 val_129
+131 val_131
+133 val_133
+134 val_134
+134 val_134
+136 val_136
+137 val_137
+137 val_137
+138 val_138
+138 val_138
+138 val_138
+138 val_138
+143 val_143
+145 val_145
+146 val_146
+146 val_146
+149 val_149
+149 val_149
+15 val_15
+15 val_15
+150 val_150
+152 val_152
+152 val_152
+153 val_153
+155 val_155
+156 val_156
+157 val_157
+158 val_158
+160 val_160
+162 val_162
+163 val_163
+164 val_164
+164 val_164
+165 val_165
+165 val_165
+166 val_166
+167 val_167
+167 val_167
+167 val_167
+168 val_168
+169 val_169
+169 val_169
+169 val_169
+169 val_169
+17 val_17
+170 val_170
+172 val_172
+172 val_172
+174 val_174
+174 val_174
+175 val_175
+175 val_175
+176 val_176
+176 val_176
+177 val_177
+178 val_178
+179 val_179
+179 val_179
+18 val_18
+18 val_18
+180 val_180
+181 val_181
+183 val_183
+186 val_186
+187 val_187
+187 val_187
+187 val_187
+189 val_189
+19 val_19
+190 val_190
+191 val_191
+191 val_191
+192 val_192
+193 val_193
+193 val_193
+193 val_193
+194 val_194
+195 val_195
+195 val_195
+196 val_196
+197 val_197
+197 val_197
+199 val_199
+199 val_199
+199 val_199
+2 val_2
+PREHOOK: query: -- no agg, corr, having
+select *
+from src_cbo b
+group by key, value
+having not exists
+ (select a.key
+ from src_cbo a
+ where b.value = a.value and a.key = b.key and a.value > 'val_12'
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+POSTHOOK: query: -- no agg, corr, having
+select *
+from src_cbo b
+group by key, value
+having not exists
+ (select a.key
+ from src_cbo a
+ where b.value = a.value and a.key = b.key and a.value > 'val_12'
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+0 val_0
+10 val_10
+100 val_100
+103 val_103
+104 val_104
+105 val_105
+11 val_11
+111 val_111
+113 val_113
+114 val_114
+116 val_116
+118 val_118
+119 val_119
+12 val_12
+PREHOOK: query: -- 19. SubQueries Exists
+-- view test
+create view cv1 as
+select *
+from src_cbo b
+where exists
+ (select a.key
+ from src_cbo a
+ where b.value = a.value and a.key = b.key and a.value > 'val_9')
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@src_cbo
+POSTHOOK: query: -- 19. SubQueries Exists
+-- view test
+create view cv1 as
+select *
+from src_cbo b
+where exists
+ (select a.key
+ from src_cbo a
+ where b.value = a.value and a.key = b.key and a.value > 'val_9')
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@src_cbo
+POSTHOOK: Output: default@cv1
+PREHOOK: query: select * from cv1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cv1
+PREHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+POSTHOOK: query: select * from cv1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cv1
+POSTHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+90 val_90
+90 val_90
+90 val_90
+92 val_92
+95 val_95
+95 val_95
+96 val_96
+97 val_97
+97 val_97
+98 val_98
+98 val_98
+PREHOOK: query: -- sq in from
+select *
+from (select *
+ from src_cbo b
+ where exists
+ (select a.key
+ from src_cbo a
+ where b.value = a.value and a.key = b.key and a.value > 'val_9')
+ ) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+POSTHOOK: query: -- sq in from
+select *
+from (select *
+ from src_cbo b
+ where exists
+ (select a.key
+ from src_cbo a
+ where b.value = a.value and a.key = b.key and a.value > 'val_9')
+ ) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+90 val_90
+90 val_90
+90 val_90
+92 val_92
+95 val_95
+95 val_95
+96 val_96
+97 val_97
+97 val_97
+98 val_98
+98 val_98
+PREHOOK: query: -- sq in from, having
+select *
+from (select b.key, count(*)
+ from src_cbo b
+ group by b.key
+ having exists
+ (select a.key
+ from src_cbo a
+ where a.key = b.key and a.value > 'val_9'
+ )
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+POSTHOOK: query: -- sq in from, having
+select *
+from (select b.key, count(*)
+ from src_cbo b
+ group by b.key
+ having exists
+ (select a.key
+ from src_cbo a
+ where a.key = b.key and a.value > 'val_9'
+ )
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+90 3
+92 1
+95 2
+96 1
+97 2
+98 2