You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/08/25 19:16:07 UTC

svn commit: r1620377 - in /hive/branches/cbo/ql/src: java/org/apache/hadoop/hive/ql/optimizer/optiq/ java/org/apache/hadoop/hive/ql/parse/ test/queries/clientpositive/ test/results/clientpositive/

Author: hashutosh
Date: Mon Aug 25 17:16:07 2014
New Revision: 1620377

URL: http://svn.apache.org/r1620377
Log:
HIVE-7724 : CBO: support Subquery predicates (Harish Butani via Ashutosh Chauhan)

Modified:
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q
    hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java?rev=1620377&r1=1620376&r2=1620377&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/HiveOptiqUtil.java Mon Aug 25 17:16:07 2014
@@ -131,8 +131,8 @@ public class HiveOptiqUtil {
 
     for (i = 0; i < origColEqConds.size(); i++) {
       Pair<Integer, Integer> p = origColEqConds.get(i);
-      RexNode leftKey = leftJoinKeys.get(p.left);
-      RexNode rightKey = rightJoinKeys.get(p.right);
+      RexNode leftKey = leftJoinKeys.get(i);
+      RexNode rightKey = rightJoinKeys.get(i);
       leftKeys.add(p.left);
       rightKeys.add(p.right);
       RexNode cond = rexBuilder.makeCall(

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java?rev=1620377&r1=1620376&r2=1620377&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java Mon Aug 25 17:16:07 2014
@@ -355,12 +355,16 @@ public class RowResolver implements Seri
   // TODO: 1) How to handle collisions? 2) Should we be cloning ColumnInfo or
   // not?
   public static int add(RowResolver rrToAddTo, RowResolver rrToAddFrom,
-      int outputColPos) throws SemanticException {
+      int outputColPos, int numColumns) throws SemanticException {
     String tabAlias;
     String colAlias;
     String[] qualifiedColName;
+    int i = 0;
 
     for (ColumnInfo cInfoFrmInput : rrToAddFrom.getRowSchema().getSignature()) {
+      if ( numColumns >= 0 && i == numColumns ) {
+        break;
+      }
       ColumnInfo newCI = null;
       qualifiedColName = rrToAddFrom.getInvRslvMap().get(
           cInfoFrmInput.getInternalName());
@@ -377,11 +381,25 @@ public class RowResolver implements Seri
         throw new RuntimeException("Ambigous Column Names");
 
       rrToAddTo.put(tabAlias, colAlias, newCI);
+
+      qualifiedColName = rrToAddFrom.getAlternateMappings(cInfoFrmInput
+          .getInternalName());
+      if (qualifiedColName != null) {
+        tabAlias = qualifiedColName[0];
+        colAlias = qualifiedColName[1];
+        rrToAddTo.put(tabAlias, colAlias, newCI);
+      }
+      i++;
     }
 
     return outputColPos;
 	}
 
+  public static int add(RowResolver rrToAddTo, RowResolver rrToAddFrom,
+      int outputColPos) throws SemanticException {
+    return add(rrToAddTo, rrToAddFrom, outputColPos, -1);
+  }
+
 	/**
 	 * Return a new row resolver that is combination of left RR and right RR.
 	 * The schema will be schema of left, schema of right

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1620377&r1=1620376&r2=1620377&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Mon Aug 25 17:16:07 2014
@@ -12374,14 +12374,197 @@ public class SemanticAnalyzer extends Ba
       return filterRel;
     }
 
-    private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel)
+    private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel,
+        Map<String, RelNode> aliasToRel, boolean forHavingClause)
+        throws SemanticException {
+      /*
+       * Handle Subquery predicates.
+       *
+       * Notes (8/22/14 hb):
+       * Why is this a copy of the code from {@link #genFilterPlan}
+       * - for now we will support the same behavior as non CBO route.
+       * - but plan to allow nested SubQueries(Restriction.9.m) and
+       *   multiple SubQuery expressions(Restriction.8.m). This
+       *   requires use to utilize Optiq's Decorrelation mechanics,
+       *   and for Optiq to fix/flush out Null semantics(OPTIQ-373)
+       * - besides only the driving code has been copied. Most of
+       *   the code which is SubQueryUtils and QBSubQuery is reused.
+       *
+       */
+      int numSrcColumns = srcRel.getRowType().getFieldCount();
+      List<ASTNode> subQueriesInOriginalTree = SubQueryUtils
+          .findSubQueries(searchCond);
+      if (subQueriesInOriginalTree.size() > 0) {
+
+        /*
+         * Restriction.9.m :: disallow nested SubQuery expressions.
+         */
+        if (qb.getSubQueryPredicateDef() != null) {
+          throw new SemanticException(
+              ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+                  subQueriesInOriginalTree.get(0),
+                  "Nested SubQuery expressions are not supported."));
+        }
+
+        /*
+         * Restriction.8.m :: We allow only 1 SubQuery expression per Query.
+         */
+        if (subQueriesInOriginalTree.size() > 1) {
+
+          throw new SemanticException(
+              ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+                  subQueriesInOriginalTree.get(1),
+                  "Only 1 SubQuery expression is supported."));
+        }
+
+        /*
+         * Clone the Search AST; apply all rewrites on the clone.
+         */
+        ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor
+            .dupTree(searchCond);
+        List<ASTNode> subQueries = SubQueryUtils
+            .findSubQueries(clonedSearchCond);
+
+        RowResolver inputRR = m_relToHiveRR.get(srcRel);
+
+        for (int i = 0; i < subQueries.size(); i++) {
+          ASTNode subQueryAST = subQueries.get(i);
+          ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i);
+
+          int sqIdx = qb.incrNumSubQueryPredicates();
+          clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(
+              clonedSearchCond, subQueryAST);
+
+          QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx,
+              subQueryAST, originalSubQueryAST, ctx);
+
+          if (!forHavingClause) {
+            qb.setWhereClauseSubQueryPredicate(subQuery);
+          } else {
+            qb.setHavingClauseSubQueryPredicate(subQuery);
+          }
+          String havingInputAlias = null;
+
+          if (forHavingClause) {
+            havingInputAlias = "gby_sq" + sqIdx;
+            aliasToRel.put(havingInputAlias, srcRel);
+          }
+
+          subQuery.validateAndRewriteAST(inputRR, forHavingClause,
+              havingInputAlias, aliasToRel.keySet());
+
+          QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(),
+              true);
+          qbSQ.setSubQueryDef(subQuery.getSubQuery());
+          Phase1Ctx ctx_1 = initPhase1Ctx();
+          doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1);
+          getMetaData(qbSQ);
+          RelNode subQueryRelNode = genLogicalPlan(qbSQ);
+          aliasToRel.put(subQuery.getAlias(), subQueryRelNode);
+          RowResolver sqRR = m_relToHiveRR.get(subQueryRelNode);
+
+          /*
+           * Check.5.h :: For In and Not In the SubQuery must implicitly or
+           * explicitly only contain one select item.
+           */
+          if (subQuery.getOperator().getType() != SubQueryType.EXISTS
+              && subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS
+              && sqRR.getColumnInfos().size()
+                  - subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1) {
+            throw new SemanticException(
+                ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(subQueryAST,
+                    "SubQuery can contain only 1 item in Select List."));
+          }
+
+          /*
+           * If this is a Not In SubQuery Predicate then Join in the Null Check
+           * SubQuery. See QBSubQuery.NotInCheck for details on why and how this
+           * is constructed.
+           */
+          if (subQuery.getNotInCheck() != null) {
+            QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck();
+            notInCheck.setSQRR(sqRR);
+            QB qbSQ_nic = new QB(subQuery.getOuterQueryId(),
+                notInCheck.getAlias(), true);
+            qbSQ_nic.setSubQueryDef(notInCheck.getSubQuery());
+            ctx_1 = initPhase1Ctx();
+            doPhase1(notInCheck.getSubQueryAST(), qbSQ_nic, ctx_1);
+            getMetaData(qbSQ_nic);
+            RelNode subQueryNICRelNode = genLogicalPlan(qbSQ_nic);
+            aliasToRel.put(notInCheck.getAlias(), subQueryNICRelNode);
+            srcRel = genJoinRelNode(srcRel, subQueryNICRelNode,
+            // set explicitly to inner until we figure out SemiJoin use
+            // notInCheck.getJoinType(),
+                JoinType.INNER, notInCheck.getJoinConditionAST());
+            inputRR = m_relToHiveRR.get(srcRel);
+            if (forHavingClause) {
+              aliasToRel.put(havingInputAlias, srcRel);
+            }
+          }
+
+          /*
+           * Gen Join between outer Operator and SQ op
+           */
+          subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause,
+              havingInputAlias);
+          srcRel = genJoinRelNode(srcRel, subQueryRelNode,
+              subQuery.getJoinType(), subQuery.getJoinConditionAST());
+          searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond);
+
+          srcRel = genFilterRelNode(searchCond, srcRel);
+
+          /*
+           * For Not Exists and Not In, add a projection on top of the Left
+           * Outer Join.
+           */
+          if (subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS
+              || subQuery.getOperator().getType() != SubQueryType.NOT_IN) {
+            srcRel = projectLeftOuterSide(srcRel, numSrcColumns);
+          }
+        }
+        return srcRel;
+      }
+
+      return genFilterRelNode(searchCond, srcRel);
+    }
+
+    private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns)
+        throws SemanticException {
+      RowResolver iRR = m_relToHiveRR.get(srcRel);
+      RowResolver oRR = new RowResolver();
+      RowResolver.add(oRR, iRR, 0, numColumns);
+
+      List<RexNode> optiqColLst = new ArrayList<RexNode>();
+      List<String> oFieldNames = new ArrayList<String>();
+      RelDataType iType = srcRel.getRowType();
+
+      for (int i = 0; i < iType.getFieldCount(); i++) {
+        RelDataTypeField fType = iType.getFieldList().get(i);
+        String fName = iType.getFieldNames().get(i);
+        optiqColLst.add(m_cluster.getRexBuilder().makeInputRef(fType.getType(),
+            i));
+        oFieldNames.add(fName);
+      }
+
+      HiveRel selRel = HiveProjectRel.create(srcRel, optiqColLst, oFieldNames);
+
+      this.m_relToHiveColNameOptiqPosMap.put(selRel,
+          buildHiveToOptiqColumnMap(oRR, selRel));
+      this.m_relToHiveRR.put(selRel, oRR);
+      return selRel;
+    }
+
+    private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel,
+        Map<String, RelNode> aliasToRel,
+        boolean forHavingClause)
         throws SemanticException {
       RelNode filterRel = null;
 
       Iterator<ASTNode> whereClauseIterator = getQBParseInfo(qb)
           .getDestToWhereExpr().values().iterator();
       if (whereClauseIterator.hasNext()) {
-        filterRel = genFilterRelNode((ASTNode) whereClauseIterator.next().getChild(0), srcRel);
+        filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0),
+            srcRel, aliasToRel, forHavingClause);
       }
 
       return filterRel;
@@ -13263,7 +13446,7 @@ public class SemanticAnalyzer extends Ba
       }
 
       // 2. Build Rel for where Clause
-      filterRel = genFilterLogicalPlan(qb, srcRel);
+      filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, false);
       srcRel = (filterRel == null) ? srcRel : filterRel;
 
       // 3. Build Rel for GB Clause
@@ -13271,7 +13454,7 @@ public class SemanticAnalyzer extends Ba
       srcRel = (gbRel == null) ? srcRel : gbRel;
 
       // 4. Build Rel for GB Having Clause
-      gbHavingRel = genGBHavingLogicalPlan(qb, srcRel);
+      gbHavingRel = genGBHavingLogicalPlan(qb, srcRel, aliasToRel);
       srcRel = (gbHavingRel == null) ? srcRel : gbHavingRel;
 
       // 5. Build Rel for Select Clause
@@ -13316,7 +13499,8 @@ public class SemanticAnalyzer extends Ba
       return srcRel;
     }
 
-    private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel)
+    private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel,
+        Map<String, RelNode> aliasToRel)
         throws SemanticException {
       RelNode gbFilter = null;
       QBParseInfo qbp = getQBParseInfo(qb);
@@ -13324,7 +13508,7 @@ public class SemanticAnalyzer extends Ba
           .iterator().next());
 
       if (havingClause != null)
-        gbFilter = genFilterRelNode((ASTNode) havingClause.getChild(0), srcRel);
+        gbFilter = genFilterRelNode(qb, (ASTNode) havingClause.getChild(0), srcRel, aliasToRel, true);
 
       return gbFilter;
     }

Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q?rev=1620377&r1=1620376&r2=1620377&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/cbo_correctness.q Mon Aug 25 17:16:07 2014
@@ -12,6 +12,45 @@ load data local inpath '../../data/files
 load data local inpath '../../data/files/cbo_t2.txt' into table t2 partition (dt='2014');
 load data local inpath '../../data/files/cbo_t3.txt' into table t3;
 
+CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part;
+
+DROP TABLE lineitem;
+CREATE TABLE lineitem (L_ORDERKEY      INT,
+                                L_PARTKEY       INT,
+                                L_SUPPKEY       INT,
+                                L_LINENUMBER    INT,
+                                L_QUANTITY      DOUBLE,
+                                L_EXTENDEDPRICE DOUBLE,
+                                L_DISCOUNT      DOUBLE,
+                                L_TAX           DOUBLE,
+                                L_RETURNFLAG    STRING,
+                                L_LINESTATUS    STRING,
+                                l_shipdate      STRING,
+                                L_COMMITDATE    STRING,
+                                L_RECEIPTDATE   STRING,
+                                L_SHIPINSTRUCT  STRING,
+                                L_SHIPMODE      STRING,
+                                L_COMMENT       STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|';
+
+LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem;
+
+create table src_cbo as select * from src;
+
+
 set hive.stats.dbclass=jdbc:derby;
 analyze table t1 partition (dt) compute statistics;
 analyze table t1 compute statistics for columns key, value, c_int, c_float, c_boolean;
@@ -19,6 +58,12 @@ analyze table t2 partition (dt) compute 
 analyze table t2 compute statistics for columns key, value, c_int, c_float, c_boolean;
 analyze table t3 compute statistics;
 analyze table t3 compute statistics for columns key, value, c_int, c_float, c_boolean;
+analyze table src_cbo compute statistics;
+analyze table src_cbo compute statistics for columns;
+analyze table part compute statistics;
+analyze table part compute statistics for columns;
+analyze table lineitem compute statistics;
+analyze table lineitem compute statistics for columns;
 
 set hive.stats.fetch.column.stats=true;
 set hive.auto.convert.join=false;
@@ -223,3 +268,185 @@ select key from t1 where c_int = -6  or 
 
 -- 15. query referencing only partition columns
 select count(t1.dt) from t1 join t2 on t1.dt  = t2.dt  where t1.dt = '2014' ;
+
+-- 16. SubQueries Not In
+-- non agg, non corr
+select * 
+from src_cbo 
+where src_cbo.key not in  
+  ( select key  from src_cbo s1 
+    where s1.key > '2'
+  )
+;
+
+-- non agg, corr
+select p_mfgr, b.p_name, p_size 
+from part b 
+where b.p_name not in 
+  (select p_name 
+  from (select p_mfgr, p_name, p_size as r from part) a 
+  where r < 10 and b.p_mfgr = a.p_mfgr 
+  )
+;
+
+-- agg, non corr
+select p_name, p_size 
+from 
+part where part.p_size not in 
+  (select avg(p_size) 
+  from (select p_size from part) a 
+  where p_size < 10
+  )
+;
+
+-- agg, corr
+select p_mfgr, p_name, p_size 
+from part b where b.p_size not in 
+  (select min(p_size) 
+  from (select p_mfgr, p_size from part) a 
+  where p_size < 10 and b.p_mfgr = a.p_mfgr
+  )
+;
+
+-- non agg, non corr, Group By in Parent Query
+select li.l_partkey, count(*) 
+from lineitem li 
+where li.l_linenumber = 1 and 
+  li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') 
+group by li.l_partkey
+;
+
+-- add null check test from sq_notin.q once HIVE-7721 resolved.
+
+-- non agg, corr, having
+select b.p_mfgr, min(p_retailprice) 
+from part b 
+group by b.p_mfgr
+having b.p_mfgr not in 
+  (select p_mfgr 
+  from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a 
+  where min(p_retailprice) = l and r - l > 600
+  )
+;
+
+-- agg, non corr, having
+select b.p_mfgr, min(p_retailprice) 
+from part b 
+group by b.p_mfgr
+having b.p_mfgr not in 
+  (select p_mfgr 
+  from part a
+  group by p_mfgr
+  having max(p_retailprice) - min(p_retailprice) > 600
+  )
+;
+
+-- 17. SubQueries In
+-- non agg, non corr
+select * 
+from src_cbo 
+where src_cbo.key in (select key from src_cbo s1 where s1.key > '9')
+;
+
+-- agg, corr
+-- add back once rank issue fixed for cbo
+
+-- distinct, corr
+select * 
+from src_cbo b 
+where b.key in
+        (select distinct a.key 
+         from src_cbo a 
+         where b.value = a.value and a.key > '9'
+        )
+;
+
+-- non agg, corr, with join in Parent Query
+select p.p_partkey, li.l_suppkey 
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey 
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
+;
+
+-- where and having
+-- Plan is:
+-- Stage 1: b semijoin sq1:src_cbo (subquery in where)
+-- Stage 2: group by Stage 1 o/p
+-- Stage 5: group by on sq2:src_cbo (subquery in having)
+-- Stage 6: Stage 2 o/p semijoin Stage 5
+explain
+select key, value, count(*) 
+from src_cbo b
+where b.key in (select key from src_cbo where src_cbo.key > '8')
+group by key, value
+having count(*) in (select count(*) from src_cbo s1 where s1.key > '9' group by s1.key )
+;
+
+-- non agg, non corr, windowing
+explain
+select p_mfgr, p_name, avg(p_size) 
+from part 
+group by p_mfgr, p_name
+having p_name in 
+  (select first_value(p_name) over(partition by p_mfgr order by p_size) from part)
+;
+
+-- 18. SubQueries Not Exists
+-- distinct, corr
+select * 
+from src_cbo b 
+where not exists 
+  (select distinct a.key 
+  from src_cbo a 
+  where b.value = a.value and a.value > 'val_2'
+  )
+;
+
+-- no agg, corr, having
+select * 
+from src_cbo b 
+group by key, value
+having not exists 
+  (select a.key 
+  from src_cbo a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_12'
+  )
+;
+
+-- 19. SubQueries Exists
+-- view test
+create view cv1 as 
+select * 
+from src_cbo b 
+where exists
+  (select a.key 
+  from src_cbo a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9')
+;
+
+select * from cv1
+;
+
+-- sq in from
+select * 
+from (select * 
+      from src_cbo b 
+      where exists 
+          (select a.key 
+          from src_cbo a 
+          where b.value = a.value  and a.key = b.key and a.value > 'val_9')
+     ) a
+;
+
+-- sq in from, having
+select *
+from (select b.key, count(*) 
+  from src_cbo b 
+  group by b.key
+  having exists 
+    (select a.key 
+    from src_cbo a 
+    where a.key = b.key and a.value > 'val_9'
+    )
+) a
+;

Modified: hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out?rev=1620377&r1=1620376&r2=1620377&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out (original)
+++ hive/branches/cbo/ql/src/test/results/clientpositive/cbo_correctness.q.out Mon Aug 25 17:16:07 2014
@@ -60,6 +60,103 @@ POSTHOOK: query: load data local inpath 
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@t3
+PREHOOK: query: CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@part
+POSTHOOK: query: CREATE TABLE part( 
+    p_partkey INT,
+    p_name STRING,
+    p_mfgr STRING,
+    p_brand STRING,
+    p_type STRING,
+    p_size INT,
+    p_container STRING,
+    p_retailprice DOUBLE,
+    p_comment STRING
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@part
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@part
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@part
+PREHOOK: query: DROP TABLE lineitem
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE lineitem
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE lineitem (L_ORDERKEY      INT,
+                                L_PARTKEY       INT,
+                                L_SUPPKEY       INT,
+                                L_LINENUMBER    INT,
+                                L_QUANTITY      DOUBLE,
+                                L_EXTENDEDPRICE DOUBLE,
+                                L_DISCOUNT      DOUBLE,
+                                L_TAX           DOUBLE,
+                                L_RETURNFLAG    STRING,
+                                L_LINESTATUS    STRING,
+                                l_shipdate      STRING,
+                                L_COMMITDATE    STRING,
+                                L_RECEIPTDATE   STRING,
+                                L_SHIPINSTRUCT  STRING,
+                                L_SHIPMODE      STRING,
+                                L_COMMENT       STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@lineitem
+POSTHOOK: query: CREATE TABLE lineitem (L_ORDERKEY      INT,
+                                L_PARTKEY       INT,
+                                L_SUPPKEY       INT,
+                                L_LINENUMBER    INT,
+                                L_QUANTITY      DOUBLE,
+                                L_EXTENDEDPRICE DOUBLE,
+                                L_DISCOUNT      DOUBLE,
+                                L_TAX           DOUBLE,
+                                L_RETURNFLAG    STRING,
+                                L_LINESTATUS    STRING,
+                                l_shipdate      STRING,
+                                L_COMMITDATE    STRING,
+                                L_RECEIPTDATE   STRING,
+                                L_SHIPINSTRUCT  STRING,
+                                L_SHIPMODE      STRING,
+                                L_COMMENT       STRING)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@lineitem
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@lineitem
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@lineitem
+PREHOOK: query: create table src_cbo as select * from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+POSTHOOK: query: create table src_cbo as select * from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@src_cbo
 PREHOOK: query: analyze table t1 partition (dt) compute statistics
 PREHOOK: type: QUERY
 PREHOOK: Input: default@t1
@@ -120,6 +217,54 @@ POSTHOOK: query: analyze table t3 comput
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t3
 #### A masked pattern was here ####
+PREHOOK: query: analyze table src_cbo compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_cbo
+PREHOOK: Output: default@src_cbo
+POSTHOOK: query: analyze table src_cbo compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_cbo
+POSTHOOK: Output: default@src_cbo
+PREHOOK: query: analyze table src_cbo compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table src_cbo compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+PREHOOK: query: analyze table part compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+PREHOOK: Output: default@part
+POSTHOOK: query: analyze table part compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+POSTHOOK: Output: default@part
+PREHOOK: query: analyze table part compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table part compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+PREHOOK: query: analyze table lineitem compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@lineitem
+PREHOOK: Output: default@lineitem
+POSTHOOK: query: analyze table lineitem compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@lineitem
+POSTHOOK: Output: default@lineitem
+PREHOOK: query: analyze table lineitem compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@lineitem
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table lineitem compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@lineitem
+#### A masked pattern was here ####
 PREHOOK: query: -- 1. Test Select + TS
 select * from t1
 PREHOOK: type: QUERY
@@ -17911,3 +18056,1122 @@ POSTHOOK: Input: default@t2
 POSTHOOK: Input: default@t2@dt=2014
 #### A masked pattern was here ####
 400
+Warning: Shuffle Join JOIN[24][tables = [$hdt$_519, $hdt$_524]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: -- 16. SubQueries Not In
+-- non agg, non corr
+select * 
+from src_cbo 
+where src_cbo.key not in  
+  ( select key  from src_cbo s1 
+    where s1.key > '2'
+  )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+POSTHOOK: query: -- 16. SubQueries Not In
+-- non agg, non corr
+select * 
+from src_cbo 
+where src_cbo.key not in  
+  ( select key  from src_cbo s1 
+    where s1.key > '2'
+  )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+0	val_0
+0	val_0
+0	val_0
+10	val_10
+100	val_100
+100	val_100
+103	val_103
+103	val_103
+104	val_104
+104	val_104
+105	val_105
+11	val_11
+111	val_111
+113	val_113
+113	val_113
+114	val_114
+116	val_116
+118	val_118
+118	val_118
+119	val_119
+119	val_119
+119	val_119
+12	val_12
+12	val_12
+120	val_120
+120	val_120
+125	val_125
+125	val_125
+126	val_126
+128	val_128
+128	val_128
+128	val_128
+129	val_129
+129	val_129
+131	val_131
+133	val_133
+134	val_134
+134	val_134
+136	val_136
+137	val_137
+137	val_137
+138	val_138
+138	val_138
+138	val_138
+138	val_138
+143	val_143
+145	val_145
+146	val_146
+146	val_146
+149	val_149
+149	val_149
+15	val_15
+15	val_15
+150	val_150
+152	val_152
+152	val_152
+153	val_153
+155	val_155
+156	val_156
+157	val_157
+158	val_158
+160	val_160
+162	val_162
+163	val_163
+164	val_164
+164	val_164
+165	val_165
+165	val_165
+166	val_166
+167	val_167
+167	val_167
+167	val_167
+168	val_168
+169	val_169
+169	val_169
+169	val_169
+169	val_169
+17	val_17
+170	val_170
+172	val_172
+172	val_172
+174	val_174
+174	val_174
+175	val_175
+175	val_175
+176	val_176
+176	val_176
+177	val_177
+178	val_178
+179	val_179
+179	val_179
+18	val_18
+18	val_18
+180	val_180
+181	val_181
+183	val_183
+186	val_186
+187	val_187
+187	val_187
+187	val_187
+189	val_189
+19	val_19
+190	val_190
+191	val_191
+191	val_191
+192	val_192
+193	val_193
+193	val_193
+193	val_193
+194	val_194
+195	val_195
+195	val_195
+196	val_196
+197	val_197
+197	val_197
+199	val_199
+199	val_199
+199	val_199
+2	val_2
+Warning: Shuffle Join JOIN[26][tables = [$hdt$_530, $hdt$_536]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: -- non agg, corr
+select p_mfgr, b.p_name, p_size 
+from part b 
+where b.p_name not in 
+  (select p_name 
+  from (select p_mfgr, p_name, p_size as r from part) a 
+  where r < 10 and b.p_mfgr = a.p_mfgr 
+  )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- non agg, corr
+select p_mfgr, b.p_name, p_size 
+from part b 
+where b.p_name not in 
+  (select p_name 
+  from (select p_mfgr, p_name, p_size as r from part) a 
+  where r < 10 and b.p_mfgr = a.p_mfgr 
+  )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#5	almond antique blue firebrick mint	31
+Manufacturer#3	almond antique chartreuse khaki white	17
+Manufacturer#1	almond antique chartreuse lavender yellow	34
+Manufacturer#3	almond antique forest lavender goldenrod	14
+Manufacturer#4	almond antique gainsboro frosted violet	10
+Manufacturer#3	almond antique metallic orange dim	19
+Manufacturer#3	almond antique olive coral navajo	45
+Manufacturer#2	almond antique violet chocolate turquoise	14
+Manufacturer#4	almond antique violet mint lemon	39
+Manufacturer#2	almond antique violet turquoise frosted	40
+Manufacturer#1	almond aquamarine burnished black steel	28
+Manufacturer#5	almond aquamarine dodger light gainsboro	46
+Manufacturer#4	almond aquamarine floral ivory bisque	27
+Manufacturer#1	almond aquamarine pink moccasin thistle	42
+Manufacturer#2	almond aquamarine rose maroon antique	25
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18
+Manufacturer#4	almond azure aquamarine papaya violet	12
+Manufacturer#5	almond azure blanched chiffon midnight	23
+Warning: Shuffle Join JOIN[39][tables = [$hdt$_543, $hdt$_547, $hdt$_554]] in Stage 'Stage-3:MAPRED' is a cross product
+PREHOOK: query: -- agg, non corr
+select p_name, p_size 
+from 
+part where part.p_size not in 
+  (select avg(p_size) 
+  from (select p_size from part) a 
+  where p_size < 10
+  )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- agg, non corr
+select p_name, p_size 
+from 
+part where part.p_size not in 
+  (select avg(p_size) 
+  from (select p_size from part) a 
+  where p_size < 10
+  )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+almond aquamarine dodger light gainsboro	46
+almond antique olive coral navajo	45
+almond aquamarine pink moccasin thistle	42
+almond antique violet turquoise frosted	40
+almond antique violet mint lemon	39
+almond antique chartreuse lavender yellow	34
+almond antique blue firebrick mint	31
+almond aquamarine burnished black steel	28
+almond aquamarine floral ivory bisque	27
+almond aquamarine rose maroon antique	25
+almond azure blanched chiffon midnight	23
+almond antique metallic orange dim	19
+almond aquamarine sandy cyan gainsboro	18
+almond antique chartreuse khaki white	17
+almond antique violet chocolate turquoise	14
+almond antique forest lavender goldenrod	14
+almond azure aquamarine papaya violet	12
+almond antique gainsboro frosted violet	10
+almond aquamarine yellow dodger mint	7
+almond antique salmon chartreuse burlywood	6
+almond antique medium spring khaki	6
+almond antique burnished rose metallic	2
+almond aquamarine midnight light salmon	2
+almond antique sky peru orange	2
+almond antique burnished rose metallic	2
+almond antique misty red olive	1
+Warning: Shuffle Join JOIN[36][tables = [$hdt$_558, $hdt$_565]] in Stage 'Stage-3:MAPRED' is a cross product
+PREHOOK: query: -- agg, corr
+select p_mfgr, p_name, p_size 
+from part b where b.p_size not in 
+  (select min(p_size) 
+  from (select p_mfgr, p_size from part) a 
+  where p_size < 10 and b.p_mfgr = a.p_mfgr
+  )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- agg, corr
+select p_mfgr, p_name, p_size 
+from part b where b.p_size not in 
+  (select min(p_size) 
+  from (select p_mfgr, p_size from part) a 
+  where p_size < 10 and b.p_mfgr = a.p_mfgr
+  )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	almond antique salmon chartreuse burlywood	6
+Manufacturer#5	almond antique medium spring khaki	6
+Manufacturer#4	almond antique gainsboro frosted violet	10
+Manufacturer#4	almond azure aquamarine papaya violet	12
+Manufacturer#2	almond antique violet chocolate turquoise	14
+Manufacturer#3	almond antique forest lavender goldenrod	14
+Manufacturer#3	almond antique chartreuse khaki white	17
+Manufacturer#2	almond aquamarine sandy cyan gainsboro	18
+Manufacturer#3	almond antique metallic orange dim	19
+Manufacturer#5	almond azure blanched chiffon midnight	23
+Manufacturer#2	almond aquamarine rose maroon antique	25
+Manufacturer#4	almond aquamarine floral ivory bisque	27
+Manufacturer#1	almond aquamarine burnished black steel	28
+Manufacturer#5	almond antique blue firebrick mint	31
+Manufacturer#1	almond antique chartreuse lavender yellow	34
+Manufacturer#4	almond antique violet mint lemon	39
+Manufacturer#2	almond antique violet turquoise frosted	40
+Manufacturer#1	almond aquamarine pink moccasin thistle	42
+Manufacturer#3	almond antique olive coral navajo	45
+Manufacturer#5	almond aquamarine dodger light gainsboro	46
+Warning: Shuffle Join JOIN[24][tables = [$hdt$_573, $hdt$_578]] in Stage 'Stage-3:MAPRED' is a cross product
+PREHOOK: query: -- non agg, non corr, Group By in Parent Query
+select li.l_partkey, count(*) 
+from lineitem li 
+where li.l_linenumber = 1 and 
+  li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') 
+group by li.l_partkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@lineitem
+#### A masked pattern was here ####
+POSTHOOK: query: -- non agg, non corr, Group By in Parent Query
+select li.l_partkey, count(*) 
+from lineitem li 
+where li.l_linenumber = 1 and 
+  li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') 
+group by li.l_partkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@lineitem
+#### A masked pattern was here ####
+450	1
+7068	1
+21636	1
+22630	1
+59694	1
+61931	1
+85951	1
+88035	1
+88362	1
+106170	1
+119477	1
+119767	1
+123076	1
+139636	1
+175839	1
+182052	1
+Warning: Shuffle Join JOIN[41][tables = [$hdt$_586, $hdt$_593]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: -- add null check test from sq_notin.q once HIVE-7721 resolved.
+
+-- non agg, corr, having
+select b.p_mfgr, min(p_retailprice) 
+from part b 
+group by b.p_mfgr
+having b.p_mfgr not in 
+  (select p_mfgr 
+  from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a 
+  where min(p_retailprice) = l and r - l > 600
+  )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- add null check test from sq_notin.q once HIVE-7721 resolved.
+
+-- non agg, corr, having
+select b.p_mfgr, min(p_retailprice) 
+from part b 
+group by b.p_mfgr
+having b.p_mfgr not in 
+  (select p_mfgr 
+  from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a 
+  where min(p_retailprice) = l and r - l > 600
+  )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	1173.15
+Manufacturer#2	1690.68
+Warning: Shuffle Join JOIN[39][tables = [$hdt$_602, $hdt$_608]] in Stage 'Stage-3:MAPRED' is a cross product
+PREHOOK: query: -- agg, non corr, having
+select b.p_mfgr, min(p_retailprice) 
+from part b 
+group by b.p_mfgr
+having b.p_mfgr not in 
+  (select p_mfgr 
+  from part a
+  group by p_mfgr
+  having max(p_retailprice) - min(p_retailprice) > 600
+  )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: -- agg, non corr, having
+select b.p_mfgr, min(p_retailprice) 
+from part b 
+group by b.p_mfgr
+having b.p_mfgr not in 
+  (select p_mfgr 
+  from part a
+  group by p_mfgr
+  having max(p_retailprice) - min(p_retailprice) > 600
+  )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+Manufacturer#1	1173.15
+Manufacturer#2	1690.68
+PREHOOK: query: -- 17. SubQueries In
+-- non agg, non corr
+select * 
+from src_cbo 
+where src_cbo.key in (select key from src_cbo s1 where s1.key > '9')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+POSTHOOK: query: -- 17. SubQueries In
+-- non agg, non corr
+select * 
+from src_cbo 
+where src_cbo.key in (select key from src_cbo s1 where s1.key > '9')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+90	val_90
+90	val_90
+90	val_90
+92	val_92
+95	val_95
+95	val_95
+96	val_96
+97	val_97
+97	val_97
+98	val_98
+98	val_98
+PREHOOK: query: -- agg, corr
+-- add back once rank issue fixed for cbo
+
+-- distinct, corr
+select * 
+from src_cbo b 
+where b.key in
+        (select distinct a.key 
+         from src_cbo a 
+         where b.value = a.value and a.key > '9'
+        )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+POSTHOOK: query: -- agg, corr
+-- add back once rank issue fixed for cbo
+
+-- distinct, corr
+select * 
+from src_cbo b 
+where b.key in
+        (select distinct a.key 
+         from src_cbo a 
+         where b.value = a.value and a.key > '9'
+        )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+90	val_90
+90	val_90
+90	val_90
+92	val_92
+95	val_95
+95	val_95
+96	val_96
+97	val_97
+97	val_97
+98	val_98
+98	val_98
+PREHOOK: query: -- non agg, corr, with join in Parent Query
+select p.p_partkey, li.l_suppkey 
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey 
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@lineitem
+#### A masked pattern was here ####
+POSTHOOK: query: -- non agg, corr, with join in Parent Query
+select p.p_partkey, li.l_suppkey 
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey 
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@lineitem
+#### A masked pattern was here ####
+4297	1798
+108570	8571
+PREHOOK: query: -- where and having
+-- Plan is:
+-- Stage 1: b semijoin sq1:src_cbo (subquery in where)
+-- Stage 2: group by Stage 1 o/p
+-- Stage 5: group by on sq2:src_cbo (subquery in having)
+-- Stage 6: Stage 2 o/p semijoin Stage 5
+explain
+select key, value, count(*) 
+from src_cbo b
+where b.key in (select key from src_cbo where src_cbo.key > '8')
+group by key, value
+having count(*) in (select count(*) from src_cbo s1 where s1.key > '9' group by s1.key )
+PREHOOK: type: QUERY
+POSTHOOK: query: -- where and having
+-- Plan is:
+-- Stage 1: b semijoin sq1:src_cbo (subquery in where)
+-- Stage 2: group by Stage 1 o/p
+-- Stage 5: group by on sq2:src_cbo (subquery in having)
+-- Stage 6: Stage 2 o/p semijoin Stage 5
+explain
+select key, value, count(*) 
+from src_cbo b
+where b.key in (select key from src_cbo where src_cbo.key > '8')
+group by key, value
+having count(*) in (select count(*) from src_cbo s1 where s1.key > '9' group by s1.key )
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-3 depends on stages: Stage-2, Stage-4
+  Stage-4 is a root stage
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: b
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col1 (type: string)
+          TableScan
+            alias: src_cbo
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+            Filter Operator
+              predicate: ((key > '8') and key is not null) (type: boolean)
+              Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Semi Join 0 to 1
+          condition expressions:
+            0 {KEY.reducesinkkey0} {VALUE._col0}
+            1 
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1202 Data size: 213956 Basic stats: COMPLETE Column stats: COMPLETE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 1202 Data size: 213956 Basic stats: COMPLETE Column stats: COMPLETE
+            Group By Operator
+              aggregations: count()
+              keys: _col0 (type: string), _col1 (type: string)
+              mode: hash
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 1202 Data size: 223572 Basic stats: COMPLETE Column stats: COMPLETE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: string)
+              sort order: ++
+              Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+              Statistics: Num rows: 1202 Data size: 223572 Basic stats: COMPLETE Column stats: COMPLETE
+              value expressions: _col2 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE
+          Filter Operator
+            predicate: _col2 is not null (type: boolean)
+            Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col2 (type: bigint)
+              sort order: +
+              Map-reduce partition columns: _col2 (type: bigint)
+              Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE
+              value expressions: _col0 (type: string), _col1 (type: string)
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: bigint)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: bigint)
+              Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Semi Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {VALUE._col1} {KEY.reducesinkkey0}
+            1 
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 601 Data size: 114791 Basic stats: COMPLETE Column stats: COMPLETE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: s1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+            Filter Operator
+              predicate: (key > '9') (type: boolean)
+              Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+              Select Operator
+                expressions: key (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: count()
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: string)
+                    Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col1 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          keys: KEY._col0 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 83 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE
+          Filter Operator
+            predicate: _col1 is not null (type: boolean)
+            Statistics: Num rows: 83 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE
+            Select Operator
+              expressions: _col1 (type: bigint)
+              outputColumnNames: _col0
+              Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE
+              Group By Operator
+                keys: _col0 (type: bigint)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- non agg, non corr, windowing
+explain
+select p_mfgr, p_name, avg(p_size) 
+from part 
+group by p_mfgr, p_name
+having p_name in 
+  (select first_value(p_name) over(partition by p_mfgr order by p_size) from part)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- non agg, non corr, windowing
+explain
+select p_mfgr, p_name, avg(p_size) 
+from part 
+group by p_mfgr, p_name
+having p_name in 
+  (select first_value(p_name) over(partition by p_mfgr order by p_size) from part)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1, Stage-3
+  Stage-3 is a root stage
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: part
+            Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
+            Select Operator
+              expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+              Reduce Output Operator
+                key expressions: _col1 (type: string), _col2 (type: int)
+                sort order: ++
+                Map-reduce partition columns: _col1 (type: string)
+                Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+                value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+      Reduce Operator Tree:
+        Extract
+          Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+          PTF Operator
+            Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+            Filter Operator
+              predicate: _wcol0 is not null (type: boolean)
+              Statistics: Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+              Select Operator
+                expressions: _wcol0 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                Group By Operator
+                  keys: _col0 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 13 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col1 (type: string)
+              sort order: +
+              Map-reduce partition columns: _col1 (type: string)
+              Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+              value expressions: _col0 (type: string), _col2 (type: double)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Semi Join 0 to 1
+          condition expressions:
+            0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1}
+            1 
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 14 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 14 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 14 Data size: 114 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: part
+            Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
+            Filter Operator
+              predicate: p_name is not null (type: boolean)
+              Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+              Select Operator
+                expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int)
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: avg(_col2)
+                  keys: _col0 (type: string), _col1 (type: string)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: string)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                    Statistics: Num rows: 26 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+                    value expressions: _col2 (type: struct<count:bigint,sum:double,input:int>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: avg(VALUE._col0)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: -- 18. SubQueries Not Exists
+-- distinct, corr
+select * 
+from src_cbo b 
+where not exists 
+  (select distinct a.key 
+  from src_cbo a 
+  where b.value = a.value and a.value > 'val_2'
+  )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+POSTHOOK: query: -- 18. SubQueries Not Exists
+-- distinct, corr
+select * 
+from src_cbo b 
+where not exists 
+  (select distinct a.key 
+  from src_cbo a 
+  where b.value = a.value and a.value > 'val_2'
+  )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+0	val_0
+0	val_0
+0	val_0
+10	val_10
+100	val_100
+100	val_100
+103	val_103
+103	val_103
+104	val_104
+104	val_104
+105	val_105
+11	val_11
+111	val_111
+113	val_113
+113	val_113
+114	val_114
+116	val_116
+118	val_118
+118	val_118
+119	val_119
+119	val_119
+119	val_119
+12	val_12
+12	val_12
+120	val_120
+120	val_120
+125	val_125
+125	val_125
+126	val_126
+128	val_128
+128	val_128
+128	val_128
+129	val_129
+129	val_129
+131	val_131
+133	val_133
+134	val_134
+134	val_134
+136	val_136
+137	val_137
+137	val_137
+138	val_138
+138	val_138
+138	val_138
+138	val_138
+143	val_143
+145	val_145
+146	val_146
+146	val_146
+149	val_149
+149	val_149
+15	val_15
+15	val_15
+150	val_150
+152	val_152
+152	val_152
+153	val_153
+155	val_155
+156	val_156
+157	val_157
+158	val_158
+160	val_160
+162	val_162
+163	val_163
+164	val_164
+164	val_164
+165	val_165
+165	val_165
+166	val_166
+167	val_167
+167	val_167
+167	val_167
+168	val_168
+169	val_169
+169	val_169
+169	val_169
+169	val_169
+17	val_17
+170	val_170
+172	val_172
+172	val_172
+174	val_174
+174	val_174
+175	val_175
+175	val_175
+176	val_176
+176	val_176
+177	val_177
+178	val_178
+179	val_179
+179	val_179
+18	val_18
+18	val_18
+180	val_180
+181	val_181
+183	val_183
+186	val_186
+187	val_187
+187	val_187
+187	val_187
+189	val_189
+19	val_19
+190	val_190
+191	val_191
+191	val_191
+192	val_192
+193	val_193
+193	val_193
+193	val_193
+194	val_194
+195	val_195
+195	val_195
+196	val_196
+197	val_197
+197	val_197
+199	val_199
+199	val_199
+199	val_199
+2	val_2
+PREHOOK: query: -- no agg, corr, having
+select * 
+from src_cbo b 
+group by key, value
+having not exists 
+  (select a.key 
+  from src_cbo a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_12'
+  )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+POSTHOOK: query: -- no agg, corr, having
+select * 
+from src_cbo b 
+group by key, value
+having not exists 
+  (select a.key 
+  from src_cbo a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_12'
+  )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+0	val_0
+10	val_10
+100	val_100
+103	val_103
+104	val_104
+105	val_105
+11	val_11
+111	val_111
+113	val_113
+114	val_114
+116	val_116
+118	val_118
+119	val_119
+12	val_12
+PREHOOK: query: -- 19. SubQueries Exists
+-- view test
+create view cv1 as 
+select * 
+from src_cbo b 
+where exists
+  (select a.key 
+  from src_cbo a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9')
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@src_cbo
+POSTHOOK: query: -- 19. SubQueries Exists
+-- view test
+create view cv1 as 
+select * 
+from src_cbo b 
+where exists
+  (select a.key 
+  from src_cbo a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9')
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@src_cbo
+POSTHOOK: Output: default@cv1
+PREHOOK: query: select * from cv1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cv1
+PREHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+POSTHOOK: query: select * from cv1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cv1
+POSTHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+90	val_90
+90	val_90
+90	val_90
+92	val_92
+95	val_95
+95	val_95
+96	val_96
+97	val_97
+97	val_97
+98	val_98
+98	val_98
+PREHOOK: query: -- sq in from
+select * 
+from (select * 
+      from src_cbo b 
+      where exists 
+          (select a.key 
+          from src_cbo a 
+          where b.value = a.value  and a.key = b.key and a.value > 'val_9')
+     ) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+POSTHOOK: query: -- sq in from
+select * 
+from (select * 
+      from src_cbo b 
+      where exists 
+          (select a.key 
+          from src_cbo a 
+          where b.value = a.value  and a.key = b.key and a.value > 'val_9')
+     ) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+90	val_90
+90	val_90
+90	val_90
+92	val_92
+95	val_95
+95	val_95
+96	val_96
+97	val_97
+97	val_97
+98	val_98
+98	val_98
+PREHOOK: query: -- sq in from, having
+select *
+from (select b.key, count(*) 
+  from src_cbo b 
+  group by b.key
+  having exists 
+    (select a.key 
+    from src_cbo a 
+    where a.key = b.key and a.value > 'val_9'
+    )
+) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+POSTHOOK: query: -- sq in from, having
+select *
+from (select b.key, count(*) 
+  from src_cbo b 
+  group by b.key
+  having exists 
+    (select a.key 
+    from src_cbo a 
+    where a.key = b.key and a.value > 'val_9'
+    )
+) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_cbo
+#### A masked pattern was here ####
+90	3
+92	1
+95	2
+96	1
+97	2
+98	2