You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2018/07/12 07:09:02 UTC

[1/3] hive git commit: HIVE-20069: Fix reoptimization in case of DPP and Semijoin optimization (Zoltan Haindrich reviewed by Ashutosh Chauhan, Zoltan Haindrich)

Repository: hive
Updated Branches:
  refs/heads/master 6ef4a990d -> 5ade74060


HIVE-20069: Fix reoptimization in case of DPP and Semijoin optimization (Zoltan Haindrich reviewed by Ashutosh Chauhan, Zoltan Haindrich)

Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cac971b4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cac971b4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cac971b4

Branch: refs/heads/master
Commit: cac971b43a05a161c2be6ea220dbdf1317b07017
Parents: 6ef4a99
Author: Zoltan Haindrich <ki...@rxd.hu>
Authored: Wed Jul 11 14:48:25 2018 +0200
Committer: Zoltan Haindrich <ki...@rxd.hu>
Committed: Thu Jul 12 08:50:28 2018 +0200

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   2 +
 .../hadoop/hive/ql/parse/TezCompiler.java       |  72 ++++
 .../hive/ql/plan/mapper/StatsSources.java       |   9 +-
 .../hadoop/hive/ql/stats/OperatorStats.java     |   7 +
 ql/src/test/queries/clientpositive/reopt_dpp.q  |  62 +++
 .../queries/clientpositive/reopt_semijoin.q     |  76 ++++
 .../results/clientpositive/llap/reopt_dpp.q.out | 259 ++++++++++++
 .../clientpositive/llap/reopt_semijoin.q.out    | 420 +++++++++++++++++++
 8 files changed, 905 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/cac971b4/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 72dd144..4001b9f 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -456,6 +456,8 @@ minillaplocal.query.files=\
   bucketmapjoin6.q,\
   bucketmapjoin7.q,\
   bucketpruning1.q,\
+  reopt_dpp.q,\
+  reopt_semijoin.q,\
   retry_failure.q,\
   retry_failure_stat_changes.q,\
   retry_failure_oom.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/cac971b4/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
index dfd7908..119aa92 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
@@ -93,6 +93,7 @@ import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger;
 import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
 import org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics;
 import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc;
 import org.apache.hadoop.hive.ql.plan.BaseWork;
 import org.apache.hadoop.hive.ql.plan.ColStatistics;
 import org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc;
@@ -105,8 +106,10 @@ import org.apache.hadoop.hive.ql.plan.MoveWork;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.Statistics;
 import org.apache.hadoop.hive.ql.plan.TezWork;
+import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
+import org.apache.hadoop.hive.ql.stats.OperatorStats;
 import org.apache.hadoop.hive.ql.stats.StatsUtils;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator;
 import org.slf4j.Logger;
@@ -211,6 +214,10 @@ public class TezCompiler extends TaskCompiler {
     }
     perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Shared scans optimization");
 
+    perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+    markOperatorsWithUnstableRuntimeStats(procCtx);
+    perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "markOperatorsWithUnstableRuntimeStats");
+
     // need a new run of the constant folding because we might have created lots
     // of "and true and true" conditions.
     // Rather than run the full constant folding just need to shortcut AND/OR expressions
@@ -1006,6 +1013,71 @@ public class TezCompiler extends TaskCompiler {
     ogw.startWalking(topNodes, null);
   }
 
+  private static class MarkRuntimeStatsAsIncorrect implements NodeProcessor {
+
+    private PlanMapper planMapper;
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
+        throws SemanticException {
+      ParseContext pCtx = ((OptimizeTezProcContext) procCtx).parseContext;
+      planMapper = pCtx.getContext().getPlanMapper();
+      if (nd instanceof ReduceSinkOperator) {
+        ReduceSinkOperator rs = (ReduceSinkOperator) nd;
+        SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs);
+        if (sjInfo == null) {
+          return null;
+        }
+        walkSubtree(sjInfo.getTsOp());
+      }
+      if (nd instanceof AppMasterEventOperator) {
+        AppMasterEventOperator ame = (AppMasterEventOperator) nd;
+        AppMasterEventDesc c = ame.getConf();
+        if (c instanceof DynamicPruningEventDesc) {
+          DynamicPruningEventDesc dped = (DynamicPruningEventDesc) c;
+          mark(dped.getTableScan());
+        }
+      }
+      return null;
+    }
+
+    private void walkSubtree(Operator<?> root) {
+      Deque<Operator<?>> deque = new LinkedList<>();
+      deque.add(root);
+      while (!deque.isEmpty()) {
+        Operator<?> op = deque.pollLast();
+        mark(op);
+        if (op instanceof ReduceSinkOperator) {
+          // Done with this branch
+        } else {
+          deque.addAll(op.getChildOperators());
+        }
+      }
+    }
+
+    private void mark(Operator<?> op) {
+      planMapper.link(op, new OperatorStats.IncorrectRuntimeStatsMarker());
+    }
+
+  }
+
+  private void markOperatorsWithUnstableRuntimeStats(OptimizeTezProcContext procCtx) throws SemanticException {
+    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+    opRules.put(
+        new RuleRegExp("R1",
+            ReduceSinkOperator.getOperatorName() + "%"),
+        new MarkRuntimeStatsAsIncorrect());
+    opRules.put(
+        new RuleRegExp("R2",
+            AppMasterEventOperator.getOperatorName() + "%"),
+        new MarkRuntimeStatsAsIncorrect());
+    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
+    List<Node> topNodes = new ArrayList<Node>();
+    topNodes.addAll(procCtx.parseContext.getTopOps().values());
+    GraphWalker ogw = new PreOrderOnceWalker(disp);
+    ogw.startWalking(topNodes, null);
+  }
+
   private boolean findParallelSemiJoinBranch(Operator<?> mapjoin, TableScanOperator bigTableTS,
                                              ParseContext parseContext,
                                              Map<ReduceSinkOperator, TableScanOperator> semijoins) {

http://git-wip-us.apache.org/repos/asf/hive/blob/cac971b4/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/StatsSources.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/StatsSources.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/StatsSources.java
index 5a62046..823cb87 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/StatsSources.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/StatsSources.java
@@ -95,9 +95,14 @@ public class StatsSources {
         }
         LOG.debug(sb.toString());
       }
-      if (stat.size() >= 1 && sig.size() >= 1) {
-        map.put(sig.get(0), stat.get(0));
+      if (stat.size() < 1 || sig.size() < 1) {
+        continue;
       }
+      if (e.getAll(OperatorStats.IncorrectRuntimeStatsMarker.class).size() > 0) {
+        LOG.debug("Ignoring {}, marked with OperatorStats.IncorrectRuntimeStatsMarker", sig.get(0));
+        continue;
+      }
+      map.put(sig.get(0), stat.get(0));
     }
     return map.build();
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/cac971b4/ql/src/java/org/apache/hadoop/hive/ql/stats/OperatorStats.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/OperatorStats.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/OperatorStats.java
index d70bb82..0c56c82 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/OperatorStats.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/OperatorStats.java
@@ -23,6 +23,11 @@ import com.google.common.base.Objects;
  * Holds information an operator's statistics.
  */
 public final class OperatorStats {
+
+  /** Marker class to help with plan elements which will collect invalid statistics */
+  public static class IncorrectRuntimeStatsMarker {
+  }
+
   private String operatorId;
   private long outputRecords;
 
@@ -67,4 +72,6 @@ public final class OperatorStats {
     return Objects.equal(operatorId, o.operatorId) &&
         Objects.equal(outputRecords, o.outputRecords);
   }
+
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/cac971b4/ql/src/test/queries/clientpositive/reopt_dpp.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/reopt_dpp.q b/ql/src/test/queries/clientpositive/reopt_dpp.q
new file mode 100644
index 0000000..952dcbe
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/reopt_dpp.q
@@ -0,0 +1,62 @@
+set hive.explain.user=true;
+set hive.optimize.index.filter=true;
+set hive.auto.convert.join=true;
+set hive.vectorized.execution.enabled=true;
+
+drop table if exists x1_store_sales;
+drop table if exists x1_date_dim;
+drop table if exists x1_item;
+
+create table x1_store_sales 
+(
+	ss_item_sk	int
+)
+partitioned by (ss_sold_date_sk int)
+stored as orc;
+
+create table x1_date_dim
+(
+	d_date_sk	int,
+	d_month_seq	int,
+	d_year		int,
+	d_moy		int
+)
+stored as orc;
+
+
+insert into x1_date_dim values	(1,1,2000,2),
+				(2,2,2001,2);
+insert into x1_store_sales partition (ss_sold_date_sk=1) values (1);
+insert into x1_store_sales partition (ss_sold_date_sk=2) values (2);
+
+alter table x1_store_sales partition (ss_sold_date_sk=1) update statistics set(
+'numRows'='123456',
+'rawDataSize'='1234567');
+
+alter table x1_date_dim update statistics set(
+'numRows'='56',
+'rawDataSize'='81449');
+
+
+-- the following query is designed to produce a DPP plan
+explain 
+select   count(*) cnt
+ from
+     x1_store_sales s
+     ,x1_date_dim d
+ where  
+	1=1
+	and s.ss_sold_date_sk = d.d_date_sk
+	and d.d_year=2000;
+
+-- tablescan of s should be 2 or 123456 rows; but never 1
+-- and it should not be a mapjoin :)
+explain reoptimization
+select   count(*) cnt
+ from
+     x1_store_sales s
+     ,x1_date_dim d
+ where  
+	1=1
+	and s.ss_sold_date_sk = d.d_date_sk
+	and d.d_year=2000;

http://git-wip-us.apache.org/repos/asf/hive/blob/cac971b4/ql/src/test/queries/clientpositive/reopt_semijoin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/reopt_semijoin.q b/ql/src/test/queries/clientpositive/reopt_semijoin.q
new file mode 100644
index 0000000..0eacb8a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/reopt_semijoin.q
@@ -0,0 +1,76 @@
+set hive.explain.user=true;
+set hive.optimize.index.filter=true;
+set hive.auto.convert.join=true;
+set hive.vectorized.execution.enabled=true;
+
+drop table if exists x1_store_sales;
+drop table if exists x1_date_dim;
+
+create table x1_store_sales 
+(
+	ss_sold_date_sk int,
+	ss_item_sk	int
+)
+stored as orc;
+
+create table x1_date_dim
+(
+	d_date_sk	int,
+	d_month_seq	int,
+	d_year		int,
+	d_moy		int
+)
+stored as orc;
+
+insert into x1_date_dim values	(1,1,2000,1),
+				(2,2,2001,2),
+				(3,2,2001,3),
+				(4,2,2001,4),
+				(5,2,2001,5),
+				(6,2,2001,6),
+				(7,2,2001,7),
+				(8,2,2001,8);
+
+insert into x1_store_sales values (1,1),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10),(11,11);
+
+alter table x1_store_sales update statistics set(
+'numRows'='123456',
+'rawDataSize'='1234567');
+
+alter table x1_date_dim update statistics set(
+'numRows'='56',
+'rawDataSize'='81449');
+
+
+set hive.auto.convert.join.noconditionaltask.size=1;
+set hive.tez.dynamic.partition.pruning=true;
+set hive.tez.dynamic.semijoin.reduction=true;
+set hive.optimize.index.filter=true;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
+set hive.tez.min.bloom.filter.entries=1;
+set hive.tez.bloom.filter.factor=1.0f;
+set hive.explain.user=false;
+
+explain 
+select   sum(s.ss_item_sk)
+ from
+     x1_store_sales s
+     ,x1_date_dim d
+ where
+        1=1
+        and s.ss_sold_date_sk=d.d_date_sk
+	and d.d_moy=3
+;
+
+explain reoptimization
+select   sum(s.ss_item_sk)
+ from
+     x1_store_sales s
+     ,x1_date_dim d
+ where
+        1=1
+        and s.ss_sold_date_sk=d.d_date_sk
+	and d.d_moy=3
+;
+
+

http://git-wip-us.apache.org/repos/asf/hive/blob/cac971b4/ql/src/test/results/clientpositive/llap/reopt_dpp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/reopt_dpp.q.out b/ql/src/test/results/clientpositive/llap/reopt_dpp.q.out
new file mode 100644
index 0000000..31726f6
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/reopt_dpp.q.out
@@ -0,0 +1,259 @@
+PREHOOK: query: drop table if exists x1_store_sales
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists x1_store_sales
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists x1_date_dim
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists x1_date_dim
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists x1_item
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists x1_item
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table x1_store_sales 
+(
+	ss_item_sk	int
+)
+partitioned by (ss_sold_date_sk int)
+stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x1_store_sales
+POSTHOOK: query: create table x1_store_sales 
+(
+	ss_item_sk	int
+)
+partitioned by (ss_sold_date_sk int)
+stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x1_store_sales
+PREHOOK: query: create table x1_date_dim
+(
+	d_date_sk	int,
+	d_month_seq	int,
+	d_year		int,
+	d_moy		int
+)
+stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x1_date_dim
+POSTHOOK: query: create table x1_date_dim
+(
+	d_date_sk	int,
+	d_month_seq	int,
+	d_year		int,
+	d_moy		int
+)
+stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x1_date_dim
+PREHOOK: query: insert into x1_date_dim values	(1,1,2000,2),
+				(2,2,2001,2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@x1_date_dim
+POSTHOOK: query: insert into x1_date_dim values	(1,1,2000,2),
+				(2,2,2001,2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@x1_date_dim
+POSTHOOK: Lineage: x1_date_dim.d_date_sk SCRIPT []
+POSTHOOK: Lineage: x1_date_dim.d_month_seq SCRIPT []
+POSTHOOK: Lineage: x1_date_dim.d_moy SCRIPT []
+POSTHOOK: Lineage: x1_date_dim.d_year SCRIPT []
+PREHOOK: query: insert into x1_store_sales partition (ss_sold_date_sk=1) values (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@x1_store_sales@ss_sold_date_sk=1
+POSTHOOK: query: insert into x1_store_sales partition (ss_sold_date_sk=1) values (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@x1_store_sales@ss_sold_date_sk=1
+POSTHOOK: Lineage: x1_store_sales PARTITION(ss_sold_date_sk=1).ss_item_sk SCRIPT []
+PREHOOK: query: insert into x1_store_sales partition (ss_sold_date_sk=2) values (2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@x1_store_sales@ss_sold_date_sk=2
+POSTHOOK: query: insert into x1_store_sales partition (ss_sold_date_sk=2) values (2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@x1_store_sales@ss_sold_date_sk=2
+POSTHOOK: Lineage: x1_store_sales PARTITION(ss_sold_date_sk=2).ss_item_sk SCRIPT []
+PREHOOK: query: alter table x1_store_sales partition (ss_sold_date_sk=1) update statistics set(
+'numRows'='123456',
+'rawDataSize'='1234567')
+PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS
+PREHOOK: Input: default@x1_store_sales
+PREHOOK: Output: default@x1_store_sales@ss_sold_date_sk=1
+POSTHOOK: query: alter table x1_store_sales partition (ss_sold_date_sk=1) update statistics set(
+'numRows'='123456',
+'rawDataSize'='1234567')
+POSTHOOK: type: ALTERTABLE_UPDATEPARTSTATS
+POSTHOOK: Input: default@x1_store_sales
+POSTHOOK: Input: default@x1_store_sales@ss_sold_date_sk=1
+POSTHOOK: Output: default@x1_store_sales@ss_sold_date_sk=1
+PREHOOK: query: alter table x1_date_dim update statistics set(
+'numRows'='56',
+'rawDataSize'='81449')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: default@x1_date_dim
+PREHOOK: Output: default@x1_date_dim
+POSTHOOK: query: alter table x1_date_dim update statistics set(
+'numRows'='56',
+'rawDataSize'='81449')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: default@x1_date_dim
+POSTHOOK: Output: default@x1_date_dim
+PREHOOK: query: explain 
+select   count(*) cnt
+ from
+     x1_store_sales s
+     ,x1_date_dim d
+ where  
+	1=1
+	and s.ss_sold_date_sk = d.d_date_sk
+	and d.d_year=2000
+PREHOOK: type: QUERY
+POSTHOOK: query: explain 
+select   count(*) cnt
+ from
+     x1_store_sales s
+     ,x1_date_dim d
+ where  
+	1=1
+	and s.ss_sold_date_sk = d.d_date_sk
+	and d.d_year=2000
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 1 <- Map 3 (BROADCAST_EDGE)
+Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized, llap
+      File Output Operator [FS_35]
+        Group By Operator [GBY_34] (rows=1 width=8)
+          Output:["_col0"],aggregations:["count(VALUE._col0)"]
+        <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap
+          PARTITION_ONLY_SHUFFLE [RS_33]
+            Group By Operator [GBY_32] (rows=1 width=8)
+              Output:["_col0"],aggregations:["count()"]
+              Map Join Operator [MAPJOIN_31] (rows=1728398 width=8)
+                Conds:SEL_30._col0=RS_26._col0(Inner)
+              <-Map 3 [BROADCAST_EDGE] vectorized, llap
+                BROADCAST [RS_26]
+                  PartitionCols:_col0
+                  Select Operator [SEL_25] (rows=28 width=8)
+                    Output:["_col0"]
+                    Filter Operator [FIL_24] (rows=28 width=8)
+                      predicate:((d_year = 2000) and d_date_sk is not null)
+                      TableScan [TS_3] (rows=56 width=8)
+                        default@x1_date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"]
+                Dynamic Partitioning Event Operator [EVENT_29] (rows=1 width=8)
+                  Group By Operator [GBY_28] (rows=1 width=8)
+                    Output:["_col0"],keys:_col0
+                    Select Operator [SEL_27] (rows=28 width=8)
+                      Output:["_col0"]
+                       Please refer to the previous Select Operator [SEL_25]
+              <-Select Operator [SEL_30] (rows=123457 width=4)
+                  Output:["_col0"]
+                  TableScan [TS_0] (rows=123457 width=14)
+                    default@x1_store_sales,s,Tbl:COMPLETE,Col:COMPLETE
+
+PREHOOK: query: explain reoptimization
+select   count(*) cnt
+ from
+     x1_store_sales s
+     ,x1_date_dim d
+ where  
+	1=1
+	and s.ss_sold_date_sk = d.d_date_sk
+	and d.d_year=2000
+PREHOOK: type: QUERY
+PREHOOK: Input: default@x1_date_dim
+PREHOOK: Input: default@x1_store_sales
+PREHOOK: Input: default@x1_store_sales@ss_sold_date_sk=1
+PREHOOK: Input: default@x1_store_sales@ss_sold_date_sk=2
+#### A masked pattern was here ####
+POSTHOOK: query: explain reoptimization
+select   count(*) cnt
+ from
+     x1_store_sales s
+     ,x1_date_dim d
+ where  
+	1=1
+	and s.ss_sold_date_sk = d.d_date_sk
+	and d.d_year=2000
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@x1_date_dim
+POSTHOOK: Input: default@x1_store_sales
+POSTHOOK: Input: default@x1_store_sales@ss_sold_date_sk=1
+POSTHOOK: Input: default@x1_store_sales@ss_sold_date_sk=2
+#### A masked pattern was here ####
+PREHOOK: query: explain reoptimization
+select   count(*) cnt
+ from
+     x1_store_sales s
+     ,x1_date_dim d
+ where  
+	1=1
+	and s.ss_sold_date_sk = d.d_date_sk
+	and d.d_year=2000
+PREHOOK: type: QUERY
+POSTHOOK: query: explain reoptimization
+select   count(*) cnt
+ from
+     x1_store_sales s
+     ,x1_date_dim d
+ where  
+	1=1
+	and s.ss_sold_date_sk = d.d_date_sk
+	and d.d_year=2000
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 1 <- Map 3 (BROADCAST_EDGE)
+Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized, llap
+      File Output Operator [FS_35]
+        Group By Operator [GBY_34] (runtime: rows=1 width=8)
+          Output:["_col0"],aggregations:["count(VALUE._col0)"]
+        <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap
+          PARTITION_ONLY_SHUFFLE [RS_33]
+            Group By Operator [GBY_32] (runtime: rows=1 width=8)
+              Output:["_col0"],aggregations:["count()"]
+              Map Join Operator [MAPJOIN_31] (runtime: rows=1 width=8)
+                Conds:SEL_30._col0=RS_26._col0(Inner)
+              <-Map 3 [BROADCAST_EDGE] vectorized, llap
+                BROADCAST [RS_26]
+                  PartitionCols:_col0
+                  Select Operator [SEL_25] (runtime: rows=1 width=8)
+                    Output:["_col0"]
+                    Filter Operator [FIL_24] (runtime: rows=1 width=8)
+                      predicate:((d_year = 2000) and d_date_sk is not null)
+                      TableScan [TS_3] (runtime: rows=2 width=8)
+                        default@x1_date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"]
+                Dynamic Partitioning Event Operator [EVENT_29] (runtime: rows=1 width=8)
+                  Group By Operator [GBY_28] (runtime: rows=1 width=8)
+                    Output:["_col0"],keys:_col0
+                    Select Operator [SEL_27] (runtime: rows=1 width=8)
+                      Output:["_col0"]
+                       Please refer to the previous Select Operator [SEL_25]
+              <-Select Operator [SEL_30] (runtime: rows=1 width=4)
+                  Output:["_col0"]
+                  TableScan [TS_0] (rows=123457 width=14)
+                    default@x1_store_sales,s,Tbl:COMPLETE,Col:COMPLETE
+

http://git-wip-us.apache.org/repos/asf/hive/blob/cac971b4/ql/src/test/results/clientpositive/llap/reopt_semijoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/reopt_semijoin.q.out b/ql/src/test/results/clientpositive/llap/reopt_semijoin.q.out
new file mode 100644
index 0000000..e60b207
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/reopt_semijoin.q.out
@@ -0,0 +1,420 @@
+PREHOOK: query: drop table if exists x1_store_sales
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists x1_store_sales
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists x1_date_dim
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists x1_date_dim
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table x1_store_sales 
+(
+	ss_sold_date_sk int,
+	ss_item_sk	int
+)
+stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x1_store_sales
+POSTHOOK: query: create table x1_store_sales 
+(
+	ss_sold_date_sk int,
+	ss_item_sk	int
+)
+stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x1_store_sales
+PREHOOK: query: create table x1_date_dim
+(
+	d_date_sk	int,
+	d_month_seq	int,
+	d_year		int,
+	d_moy		int
+)
+stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x1_date_dim
+POSTHOOK: query: create table x1_date_dim
+(
+	d_date_sk	int,
+	d_month_seq	int,
+	d_year		int,
+	d_moy		int
+)
+stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x1_date_dim
+PREHOOK: query: insert into x1_date_dim values	(1,1,2000,1),
+				(2,2,2001,2),
+				(3,2,2001,3),
+				(4,2,2001,4),
+				(5,2,2001,5),
+				(6,2,2001,6),
+				(7,2,2001,7),
+				(8,2,2001,8)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@x1_date_dim
+POSTHOOK: query: insert into x1_date_dim values	(1,1,2000,1),
+				(2,2,2001,2),
+				(3,2,2001,3),
+				(4,2,2001,4),
+				(5,2,2001,5),
+				(6,2,2001,6),
+				(7,2,2001,7),
+				(8,2,2001,8)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@x1_date_dim
+POSTHOOK: Lineage: x1_date_dim.d_date_sk SCRIPT []
+POSTHOOK: Lineage: x1_date_dim.d_month_seq SCRIPT []
+POSTHOOK: Lineage: x1_date_dim.d_moy SCRIPT []
+POSTHOOK: Lineage: x1_date_dim.d_year SCRIPT []
+PREHOOK: query: insert into x1_store_sales values (1,1),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10),(11,11)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@x1_store_sales
+POSTHOOK: query: insert into x1_store_sales values (1,1),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10),(11,11)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@x1_store_sales
+POSTHOOK: Lineage: x1_store_sales.ss_item_sk SCRIPT []
+POSTHOOK: Lineage: x1_store_sales.ss_sold_date_sk SCRIPT []
+PREHOOK: query: alter table x1_store_sales update statistics set(
+'numRows'='123456',
+'rawDataSize'='1234567')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: default@x1_store_sales
+PREHOOK: Output: default@x1_store_sales
+POSTHOOK: query: alter table x1_store_sales update statistics set(
+'numRows'='123456',
+'rawDataSize'='1234567')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: default@x1_store_sales
+POSTHOOK: Output: default@x1_store_sales
+PREHOOK: query: alter table x1_date_dim update statistics set(
+'numRows'='56',
+'rawDataSize'='81449')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: default@x1_date_dim
+PREHOOK: Output: default@x1_date_dim
+POSTHOOK: query: alter table x1_date_dim update statistics set(
+'numRows'='56',
+'rawDataSize'='81449')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: default@x1_date_dim
+POSTHOOK: Output: default@x1_date_dim
+PREHOOK: query: explain 
+select   sum(s.ss_item_sk)
+ from
+     x1_store_sales s
+     ,x1_date_dim d
+ where
+        1=1
+        and s.ss_sold_date_sk=d.d_date_sk
+	and d.d_moy=3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain 
+select   sum(s.ss_item_sk)
+ from
+     x1_store_sales s
+     ,x1_date_dim d
+ where
+        1=1
+        and s.ss_sold_date_sk=d.d_date_sk
+	and d.d_moy=3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Reducer 5 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: s
+                  filterExpr: (ss_sold_date_sk is not null and (ss_sold_date_sk BETWEEN DynamicValue(RS_7_d_d_date_sk_min) AND DynamicValue(RS_7_d_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_d_d_date_sk_bloom_filter)))) (type: boolean)
+                  Statistics: Num rows: 123456 Data size: 987648 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((ss_sold_date_sk BETWEEN DynamicValue(RS_7_d_d_date_sk_min) AND DynamicValue(RS_7_d_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_d_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) (type: boolean)
+                    Statistics: Num rows: 123456 Data size: 987648 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 123456 Data size: 987648 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 123456 Data size: 987648 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: int)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: d
+                  filterExpr: ((d_moy = 3) and d_date_sk is not null) (type: boolean)
+                  Statistics: Num rows: 56 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((d_moy = 3) and d_date_sk is not null) (type: boolean)
+                    Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: d_date_sk (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
+                      Select Operator
+                        expressions: _col0 (type: int)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
+                        Group By Operator
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                            value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col1
+                Statistics: Num rows: 86419 Data size: 345676 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: sum(_col1)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 5 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain reoptimization
+select   sum(s.ss_item_sk)
+ from
+     x1_store_sales s
+     ,x1_date_dim d
+ where
+        1=1
+        and s.ss_sold_date_sk=d.d_date_sk
+	and d.d_moy=3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@x1_date_dim
+PREHOOK: Input: default@x1_store_sales
+#### A masked pattern was here ####
+POSTHOOK: query: explain reoptimization
+select   sum(s.ss_item_sk)
+ from
+     x1_store_sales s
+     ,x1_date_dim d
+ where
+        1=1
+        and s.ss_sold_date_sk=d.d_date_sk
+	and d.d_moy=3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@x1_date_dim
+POSTHOOK: Input: default@x1_store_sales
+#### A masked pattern was here ####
+PREHOOK: query: explain reoptimization
+select   sum(s.ss_item_sk)
+ from
+     x1_store_sales s
+     ,x1_date_dim d
+ where
+        1=1
+        and s.ss_sold_date_sk=d.d_date_sk
+	and d.d_moy=3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain reoptimization
+select   sum(s.ss_item_sk)
+ from
+     x1_store_sales s
+     ,x1_date_dim d
+ where
+        1=1
+        and s.ss_sold_date_sk=d.d_date_sk
+	and d.d_moy=3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Reducer 5 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: s
+                  filterExpr: (ss_sold_date_sk is not null and (ss_sold_date_sk BETWEEN DynamicValue(RS_7_d_d_date_sk_min) AND DynamicValue(RS_7_d_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_d_d_date_sk_bloom_filter)))) (type: boolean)
+                  Statistics: Num rows: 123456 Data size: 987648 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((ss_sold_date_sk BETWEEN DynamicValue(RS_7_d_d_date_sk_min) AND DynamicValue(RS_7_d_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_d_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) (type: boolean)
+                    Statistics: Num rows: 123456 Data size: 987648 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 123456 Data size: 987648 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 123456 Data size: 987648 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: int)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: d
+                  filterExpr: ((d_moy = 3) and d_date_sk is not null) (type: boolean)
+                  Statistics: (RUNTIME) Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((d_moy = 3) and d_date_sk is not null) (type: boolean)
+                    Statistics: (RUNTIME) Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: d_date_sk (type: int)
+                      outputColumnNames: _col0
+                      Statistics: (RUNTIME) Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: (RUNTIME) Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                      Select Operator
+                        expressions: _col0 (type: int)
+                        outputColumnNames: _col0
+                        Statistics: (RUNTIME) Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+                        Group By Operator
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: (RUNTIME) Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: (RUNTIME) Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                            value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col1
+                Statistics: (RUNTIME) Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: sum(_col1)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: (RUNTIME) Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: (RUNTIME) Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: (RUNTIME) Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 5 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: (RUNTIME) Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: (RUNTIME) Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+


[3/3] hive git commit: HIVE-20088: Beeline config location path is assembled incorrectly (Denes Bodo via Zoltan Haindrich)

Posted by kg...@apache.org.
HIVE-20088: Beeline config location path is assembled incorrectly (Denes Bodo via Zoltan Haindrich)

Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5ade7406
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5ade7406
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5ade7406

Branch: refs/heads/master
Commit: 5ade74060c9615a658d66fb0dac397671d2368ba
Parents: d118506
Author: Denes Bodo <bo...@gmail.com>
Authored: Thu Jul 12 09:01:17 2018 +0200
Committer: Zoltan Haindrich <ki...@rxd.hu>
Committed: Thu Jul 12 09:01:17 2018 +0200

----------------------------------------------------------------------
 .../hs2connection/BeelineSiteParser.java        |  2 +-
 .../UserHS2ConnectionFileParser.java            |  2 +-
 .../hs2connection/TestBeelineSiteParser.java    | 41 ++++++++++++++++++++
 .../TestUserHS2ConnectionFileParser.java        | 16 ++++++++
 4 files changed, 59 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/5ade7406/beeline/src/java/org/apache/hive/beeline/hs2connection/BeelineSiteParser.java
----------------------------------------------------------------------
diff --git a/beeline/src/java/org/apache/hive/beeline/hs2connection/BeelineSiteParser.java b/beeline/src/java/org/apache/hive/beeline/hs2connection/BeelineSiteParser.java
index 600d84e..4c55104 100644
--- a/beeline/src/java/org/apache/hive/beeline/hs2connection/BeelineSiteParser.java
+++ b/beeline/src/java/org/apache/hive/beeline/hs2connection/BeelineSiteParser.java
@@ -63,7 +63,7 @@ public class BeelineSiteParser implements HS2ConnectionFileParser {
       locations
           .add(System.getenv("HIVE_CONF_DIR") + File.separator + DEFAULT_BEELINE_SITE_FILE_NAME);
     }
-    locations.add(ETC_HIVE_CONF_LOCATION + DEFAULT_BEELINE_SITE_FILE_NAME);
+    locations.add(ETC_HIVE_CONF_LOCATION + File.separator + DEFAULT_BEELINE_SITE_FILE_NAME);
   }
 
   @VisibleForTesting

http://git-wip-us.apache.org/repos/asf/hive/blob/5ade7406/beeline/src/java/org/apache/hive/beeline/hs2connection/UserHS2ConnectionFileParser.java
----------------------------------------------------------------------
diff --git a/beeline/src/java/org/apache/hive/beeline/hs2connection/UserHS2ConnectionFileParser.java b/beeline/src/java/org/apache/hive/beeline/hs2connection/UserHS2ConnectionFileParser.java
index 9d45daf..47dee4c 100644
--- a/beeline/src/java/org/apache/hive/beeline/hs2connection/UserHS2ConnectionFileParser.java
+++ b/beeline/src/java/org/apache/hive/beeline/hs2connection/UserHS2ConnectionFileParser.java
@@ -56,7 +56,7 @@ public class UserHS2ConnectionFileParser implements HS2ConnectionFileParser {
       locations.add(
           System.getenv("HIVE_CONF_DIR") + File.separator + DEFAULT_CONNECTION_CONFIG_FILE_NAME);
     }
-    locations.add(ETC_HIVE_CONF_LOCATION + DEFAULT_CONNECTION_CONFIG_FILE_NAME);
+    locations.add(ETC_HIVE_CONF_LOCATION + File.separator + DEFAULT_CONNECTION_CONFIG_FILE_NAME);
   }
 
   @VisibleForTesting

http://git-wip-us.apache.org/repos/asf/hive/blob/5ade7406/beeline/src/test/org/apache/hive/beeline/hs2connection/TestBeelineSiteParser.java
----------------------------------------------------------------------
diff --git a/beeline/src/test/org/apache/hive/beeline/hs2connection/TestBeelineSiteParser.java b/beeline/src/test/org/apache/hive/beeline/hs2connection/TestBeelineSiteParser.java
new file mode 100644
index 0000000..fc2b44d
--- /dev/null
+++ b/beeline/src/test/org/apache/hive/beeline/hs2connection/TestBeelineSiteParser.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.beeline.hs2connection;
+
+import java.io.File;
+import java.lang.reflect.Field;
+import java.util.Collection;
+import java.util.List;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestBeelineSiteParser {
+    @Test
+    public void testConfigLocationPathInEtc() throws Exception {
+        BeelineSiteParser testHS2ConfigManager =
+                new BeelineSiteParser();
+        Field locations = testHS2ConfigManager.getClass().getDeclaredField("locations");
+        locations.setAccessible(true);
+        Collection<String> locs = (List<String>)locations.get(testHS2ConfigManager);
+        Assert.assertTrue(locs.contains(
+                BeelineSiteParser.ETC_HIVE_CONF_LOCATION +
+                        File.separator +
+                        BeelineSiteParser.DEFAULT_BEELINE_SITE_FILE_NAME));
+
+    }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/5ade7406/beeline/src/test/org/apache/hive/beeline/hs2connection/TestUserHS2ConnectionFileParser.java
----------------------------------------------------------------------
diff --git a/beeline/src/test/org/apache/hive/beeline/hs2connection/TestUserHS2ConnectionFileParser.java b/beeline/src/test/org/apache/hive/beeline/hs2connection/TestUserHS2ConnectionFileParser.java
index f5923d1..78c3a77 100644
--- a/beeline/src/test/org/apache/hive/beeline/hs2connection/TestUserHS2ConnectionFileParser.java
+++ b/beeline/src/test/org/apache/hive/beeline/hs2connection/TestUserHS2ConnectionFileParser.java
@@ -18,7 +18,9 @@
 package org.apache.hive.beeline.hs2connection;
 
 import java.io.File;
+import java.lang.reflect.Field;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
 
 import org.apache.hive.beeline.hs2connection.BeelineHS2ConnectionFileParseException;
@@ -171,6 +173,20 @@ public class TestUserHS2ConnectionFileParser {
         LOCATION_2.equals(testHS2ConfigManager.getFileLocation()));
   }
 
+  @Test
+  public void testConfigLocationPathInEtc() throws Exception {
+    UserHS2ConnectionFileParser testHS2ConfigManager =
+            new UserHS2ConnectionFileParser();
+    Field locations = testHS2ConfigManager.getClass().getDeclaredField("locations");
+    locations.setAccessible(true);
+    Collection<String> locs = (List<String>)locations.get(testHS2ConfigManager);
+    Assert.assertTrue(locs.contains(
+            UserHS2ConnectionFileParser.ETC_HIVE_CONF_LOCATION +
+            File.separator +
+            UserHS2ConnectionFileParser.DEFAULT_CONNECTION_CONFIG_FILE_NAME));
+
+  }
+
   private String getParsedUrlFromConfigFile(String filename)
       throws BeelineHS2ConnectionFileParseException {
     String path = HiveTestUtils.getFileFromClasspath(filename);


[2/3] hive git commit: HIVE-15974: Support real, double precision and numeric data types (Laszlo Bodor via Zoltan Haindrich)

Posted by kg...@apache.org.
HIVE-15974: Support real, double precision and numeric data types (Laszlo Bodor via Zoltan Haindrich)

Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d1185065
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d1185065
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d1185065

Branch: refs/heads/master
Commit: d118506580b0e3a721cae718b28587bb542f63a8
Parents: cac971b
Author: Laszlo Bodor <bo...@gmail.com>
Authored: Thu Jul 12 08:55:00 2018 +0200
Committer: Zoltan Haindrich <ki...@rxd.hu>
Committed: Thu Jul 12 08:55:00 2018 +0200

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/ql/parse/HiveLexer.g     |  1 +
 .../org/apache/hadoop/hive/ql/parse/HiveParser.g    |  3 +++
 .../apache/hadoop/hive/ql/parse/IdentifiersParser.g |  3 ++-
 .../ql/parse/TestSQL11ReservedKeyWordsNegative.java | 13 ++++++++++++-
 ql/src/test/queries/clientpositive/real.q           |  2 ++
 ql/src/test/results/clientpositive/real.q.out       | 16 ++++++++++++++++
 6 files changed, 36 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/d1185065/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
index 43ad7dd..7a63cc4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
@@ -126,6 +126,7 @@ KW_SMALLINT: 'SMALLINT';
 KW_INT: 'INT' | 'INTEGER';
 KW_BIGINT: 'BIGINT';
 KW_FLOAT: 'FLOAT';
+KW_REAL: 'REAL';
 KW_DOUBLE: 'DOUBLE';
 KW_PRECISION: 'PRECISION';
 KW_DATE: 'DATE';

http://git-wip-us.apache.org/repos/asf/hive/blob/d1185065/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index 324c804..75a25d5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -123,6 +123,7 @@ TOK_INT;
 TOK_BIGINT;
 TOK_BOOLEAN;
 TOK_FLOAT;
+TOK_REAL;
 TOK_DOUBLE;
 TOK_DATE;
 TOK_DATELITERAL;
@@ -517,6 +518,7 @@ import org.apache.hadoop.hive.conf.HiveConf;
     xlateMap.put("KW_INT", "INT");
     xlateMap.put("KW_BIGINT", "BIGINT");
     xlateMap.put("KW_FLOAT", "FLOAT");
+    xlateMap.put("KW_REAL", "REAL");
     xlateMap.put("KW_DOUBLE", "DOUBLE");
     xlateMap.put("KW_PRECISION", "PRECISION");
     xlateMap.put("KW_DATE", "DATE");
@@ -2522,6 +2524,7 @@ primitiveType
     | KW_BIGINT        ->    TOK_BIGINT
     | KW_BOOLEAN       ->    TOK_BOOLEAN
     | KW_FLOAT         ->    TOK_FLOAT
+    | KW_REAL         ->     TOK_FLOAT
     | KW_DOUBLE KW_PRECISION?       ->    TOK_DOUBLE
     | KW_DATE          ->    TOK_DATE
     | KW_DATETIME      ->    TOK_DATETIME

http://git-wip-us.apache.org/repos/asf/hive/blob/d1185065/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index 7dc6146..9e43ad5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -721,6 +721,7 @@ sysFuncNames
     | KW_INT
     | KW_BIGINT
     | KW_FLOAT
+    | KW_REAL
     | KW_DOUBLE
     | KW_BOOLEAN
     | KW_STRING
@@ -844,5 +845,5 @@ nonReserved
 //The following SQL2011 reserved keywords are used as function name only, but not as identifiers.
 sql11ReservedKeywordsUsedAsFunctionName
     :
-    KW_IF | KW_ARRAY | KW_MAP | KW_BIGINT | KW_BINARY | KW_BOOLEAN | KW_CURRENT_DATE | KW_CURRENT_TIMESTAMP | KW_DATE | KW_DOUBLE | KW_FLOAT | KW_GROUPING | KW_INT | KW_SMALLINT | KW_TIMESTAMP
+    KW_IF | KW_ARRAY | KW_MAP | KW_BIGINT | KW_BINARY | KW_BOOLEAN | KW_CURRENT_DATE | KW_CURRENT_TIMESTAMP | KW_DATE | KW_DOUBLE | KW_FLOAT | KW_REAL | KW_GROUPING | KW_INT | KW_SMALLINT | KW_TIMESTAMP
     ;

http://git-wip-us.apache.org/repos/asf/hive/blob/d1185065/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java
index 2ad2990..ea0f4d6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java
@@ -35,7 +35,7 @@ import org.junit.Test;
  * ,BOOLEAN,BOTH,BY,CONSTRAINT
  * ,CREATE,CUBE,CURRENT_DATE,CURRENT_TIMESTAMP,CURSOR,
  * DATE,DECIMAL,DELETE,DESCRIBE
- * ,DOUBLE,DROP,EXISTS,EXTERNAL,FALSE,FETCH,FLOAT,FOR
+ * ,DOUBLE,DROP,EXISTS,EXTERNAL,FALSE,FETCH,FLOAT,REAL,FOR
  * ,FOREIGN,FULL,GRANT,GROUP,GROUPING
  * ,IMPORT,IN,INNER,INSERT,INT,INTERSECT,INTO,IS
  * ,LATERAL,LEFT,LIKE,LOCAL,MINUS,NONE,NULL
@@ -435,6 +435,17 @@ public class TestSQL11ReservedKeyWordsNegative {
 		}
 	}
 
+  @Test
+  public void testSQL11ReservedKeyWords_REAL() {
+    try {
+      parse("CREATE TABLE REAL (col STRING)");
+      Assert.assertFalse("Expected ParseException", true);
+    } catch (ParseException ex) {
+      Assert.assertEquals("Failure didn't match.",
+          "line 1:13 cannot recognize input near 'REAL' '(' 'col' in table name", ex.getMessage());
+    }
+  }
+
 	@Test
 	public void testSQL11ReservedKeyWords_FOR() {
 		try {

http://git-wip-us.apache.org/repos/asf/hive/blob/d1185065/ql/src/test/queries/clientpositive/real.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/real.q b/ql/src/test/queries/clientpositive/real.q
new file mode 100644
index 0000000..bdc5ff8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/real.q
@@ -0,0 +1,2 @@
+create table realtype (a real, b real);
+describe realtype;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/d1185065/ql/src/test/results/clientpositive/real.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/real.q.out b/ql/src/test/results/clientpositive/real.q.out
new file mode 100644
index 0000000..0a72fba
--- /dev/null
+++ b/ql/src/test/results/clientpositive/real.q.out
@@ -0,0 +1,16 @@
+PREHOOK: query: create table realtype (a real, b real)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@realtype
+POSTHOOK: query: create table realtype (a real, b real)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@realtype
+PREHOOK: query: describe realtype
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@realtype
+POSTHOOK: query: describe realtype
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@realtype
+a                   	float               	                    
+b                   	float