You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2018/07/12 07:09:02 UTC
[1/3] hive git commit: HIVE-20069: Fix reoptimization in case of DPP
and Semijoin optimization (Zoltan Haindrich reviewed by Ashutosh Chauhan,
Zoltan Haindrich)
Repository: hive
Updated Branches:
refs/heads/master 6ef4a990d -> 5ade74060
HIVE-20069: Fix reoptimization in case of DPP and Semijoin optimization (Zoltan Haindrich reviewed by Ashutosh Chauhan, Zoltan Haindrich)
Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cac971b4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cac971b4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cac971b4
Branch: refs/heads/master
Commit: cac971b43a05a161c2be6ea220dbdf1317b07017
Parents: 6ef4a99
Author: Zoltan Haindrich <ki...@rxd.hu>
Authored: Wed Jul 11 14:48:25 2018 +0200
Committer: Zoltan Haindrich <ki...@rxd.hu>
Committed: Thu Jul 12 08:50:28 2018 +0200
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 2 +
.../hadoop/hive/ql/parse/TezCompiler.java | 72 ++++
.../hive/ql/plan/mapper/StatsSources.java | 9 +-
.../hadoop/hive/ql/stats/OperatorStats.java | 7 +
ql/src/test/queries/clientpositive/reopt_dpp.q | 62 +++
.../queries/clientpositive/reopt_semijoin.q | 76 ++++
.../results/clientpositive/llap/reopt_dpp.q.out | 259 ++++++++++++
.../clientpositive/llap/reopt_semijoin.q.out | 420 +++++++++++++++++++
8 files changed, 905 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/cac971b4/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 72dd144..4001b9f 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -456,6 +456,8 @@ minillaplocal.query.files=\
bucketmapjoin6.q,\
bucketmapjoin7.q,\
bucketpruning1.q,\
+ reopt_dpp.q,\
+ reopt_semijoin.q,\
retry_failure.q,\
retry_failure_stat_changes.q,\
retry_failure_oom.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/cac971b4/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
index dfd7908..119aa92 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
@@ -93,6 +93,7 @@ import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger;
import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
import org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc;
import org.apache.hadoop.hive.ql.plan.BaseWork;
import org.apache.hadoop.hive.ql.plan.ColStatistics;
import org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc;
@@ -105,8 +106,10 @@ import org.apache.hadoop.hive.ql.plan.MoveWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.Statistics;
import org.apache.hadoop.hive.ql.plan.TezWork;
+import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
+import org.apache.hadoop.hive.ql.stats.OperatorStats;
import org.apache.hadoop.hive.ql.stats.StatsUtils;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator;
import org.slf4j.Logger;
@@ -211,6 +214,10 @@ public class TezCompiler extends TaskCompiler {
}
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Shared scans optimization");
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+ markOperatorsWithUnstableRuntimeStats(procCtx);
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "markOperatorsWithUnstableRuntimeStats");
+
// need a new run of the constant folding because we might have created lots
// of "and true and true" conditions.
// Rather than run the full constant folding just need to shortcut AND/OR expressions
@@ -1006,6 +1013,71 @@ public class TezCompiler extends TaskCompiler {
ogw.startWalking(topNodes, null);
}
+ private static class MarkRuntimeStatsAsIncorrect implements NodeProcessor {
+
+ private PlanMapper planMapper;
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
+ throws SemanticException {
+ ParseContext pCtx = ((OptimizeTezProcContext) procCtx).parseContext;
+ planMapper = pCtx.getContext().getPlanMapper();
+ if (nd instanceof ReduceSinkOperator) {
+ ReduceSinkOperator rs = (ReduceSinkOperator) nd;
+ SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs);
+ if (sjInfo == null) {
+ return null;
+ }
+ walkSubtree(sjInfo.getTsOp());
+ }
+ if (nd instanceof AppMasterEventOperator) {
+ AppMasterEventOperator ame = (AppMasterEventOperator) nd;
+ AppMasterEventDesc c = ame.getConf();
+ if (c instanceof DynamicPruningEventDesc) {
+ DynamicPruningEventDesc dped = (DynamicPruningEventDesc) c;
+ mark(dped.getTableScan());
+ }
+ }
+ return null;
+ }
+
+ private void walkSubtree(Operator<?> root) {
+ Deque<Operator<?>> deque = new LinkedList<>();
+ deque.add(root);
+ while (!deque.isEmpty()) {
+ Operator<?> op = deque.pollLast();
+ mark(op);
+ if (op instanceof ReduceSinkOperator) {
+ // Done with this branch
+ } else {
+ deque.addAll(op.getChildOperators());
+ }
+ }
+ }
+
+ private void mark(Operator<?> op) {
+ planMapper.link(op, new OperatorStats.IncorrectRuntimeStatsMarker());
+ }
+
+ }
+
+ private void markOperatorsWithUnstableRuntimeStats(OptimizeTezProcContext procCtx) throws SemanticException {
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+ opRules.put(
+ new RuleRegExp("R1",
+ ReduceSinkOperator.getOperatorName() + "%"),
+ new MarkRuntimeStatsAsIncorrect());
+ opRules.put(
+ new RuleRegExp("R2",
+ AppMasterEventOperator.getOperatorName() + "%"),
+ new MarkRuntimeStatsAsIncorrect());
+ Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
+ List<Node> topNodes = new ArrayList<Node>();
+ topNodes.addAll(procCtx.parseContext.getTopOps().values());
+ GraphWalker ogw = new PreOrderOnceWalker(disp);
+ ogw.startWalking(topNodes, null);
+ }
+
private boolean findParallelSemiJoinBranch(Operator<?> mapjoin, TableScanOperator bigTableTS,
ParseContext parseContext,
Map<ReduceSinkOperator, TableScanOperator> semijoins) {
http://git-wip-us.apache.org/repos/asf/hive/blob/cac971b4/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/StatsSources.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/StatsSources.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/StatsSources.java
index 5a62046..823cb87 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/StatsSources.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/StatsSources.java
@@ -95,9 +95,14 @@ public class StatsSources {
}
LOG.debug(sb.toString());
}
- if (stat.size() >= 1 && sig.size() >= 1) {
- map.put(sig.get(0), stat.get(0));
+ if (stat.size() < 1 || sig.size() < 1) {
+ continue;
}
+ if (e.getAll(OperatorStats.IncorrectRuntimeStatsMarker.class).size() > 0) {
+ LOG.debug("Ignoring {}, marked with OperatorStats.IncorrectRuntimeStatsMarker", sig.get(0));
+ continue;
+ }
+ map.put(sig.get(0), stat.get(0));
}
return map.build();
}
http://git-wip-us.apache.org/repos/asf/hive/blob/cac971b4/ql/src/java/org/apache/hadoop/hive/ql/stats/OperatorStats.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/OperatorStats.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/OperatorStats.java
index d70bb82..0c56c82 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/OperatorStats.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/OperatorStats.java
@@ -23,6 +23,11 @@ import com.google.common.base.Objects;
* Holds information an operator's statistics.
*/
public final class OperatorStats {
+
+ /** Marker class to help with plan elements which will collect invalid statistics */
+ public static class IncorrectRuntimeStatsMarker {
+ }
+
private String operatorId;
private long outputRecords;
@@ -67,4 +72,6 @@ public final class OperatorStats {
return Objects.equal(operatorId, o.operatorId) &&
Objects.equal(outputRecords, o.outputRecords);
}
+
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/cac971b4/ql/src/test/queries/clientpositive/reopt_dpp.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/reopt_dpp.q b/ql/src/test/queries/clientpositive/reopt_dpp.q
new file mode 100644
index 0000000..952dcbe
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/reopt_dpp.q
@@ -0,0 +1,62 @@
+set hive.explain.user=true;
+set hive.optimize.index.filter=true;
+set hive.auto.convert.join=true;
+set hive.vectorized.execution.enabled=true;
+
+drop table if exists x1_store_sales;
+drop table if exists x1_date_dim;
+drop table if exists x1_item;
+
+create table x1_store_sales
+(
+ ss_item_sk int
+)
+partitioned by (ss_sold_date_sk int)
+stored as orc;
+
+create table x1_date_dim
+(
+ d_date_sk int,
+ d_month_seq int,
+ d_year int,
+ d_moy int
+)
+stored as orc;
+
+
+insert into x1_date_dim values (1,1,2000,2),
+ (2,2,2001,2);
+insert into x1_store_sales partition (ss_sold_date_sk=1) values (1);
+insert into x1_store_sales partition (ss_sold_date_sk=2) values (2);
+
+alter table x1_store_sales partition (ss_sold_date_sk=1) update statistics set(
+'numRows'='123456',
+'rawDataSize'='1234567');
+
+alter table x1_date_dim update statistics set(
+'numRows'='56',
+'rawDataSize'='81449');
+
+
+-- the following query is designed to produce a DPP plan
+explain
+select count(*) cnt
+ from
+ x1_store_sales s
+ ,x1_date_dim d
+ where
+ 1=1
+ and s.ss_sold_date_sk = d.d_date_sk
+ and d.d_year=2000;
+
+-- tablescan of s should be 2 or 123456 rows; but never 1
+-- and it should not be a mapjoin :)
+explain reoptimization
+select count(*) cnt
+ from
+ x1_store_sales s
+ ,x1_date_dim d
+ where
+ 1=1
+ and s.ss_sold_date_sk = d.d_date_sk
+ and d.d_year=2000;
http://git-wip-us.apache.org/repos/asf/hive/blob/cac971b4/ql/src/test/queries/clientpositive/reopt_semijoin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/reopt_semijoin.q b/ql/src/test/queries/clientpositive/reopt_semijoin.q
new file mode 100644
index 0000000..0eacb8a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/reopt_semijoin.q
@@ -0,0 +1,76 @@
+set hive.explain.user=true;
+set hive.optimize.index.filter=true;
+set hive.auto.convert.join=true;
+set hive.vectorized.execution.enabled=true;
+
+drop table if exists x1_store_sales;
+drop table if exists x1_date_dim;
+
+create table x1_store_sales
+(
+ ss_sold_date_sk int,
+ ss_item_sk int
+)
+stored as orc;
+
+create table x1_date_dim
+(
+ d_date_sk int,
+ d_month_seq int,
+ d_year int,
+ d_moy int
+)
+stored as orc;
+
+insert into x1_date_dim values (1,1,2000,1),
+ (2,2,2001,2),
+ (3,2,2001,3),
+ (4,2,2001,4),
+ (5,2,2001,5),
+ (6,2,2001,6),
+ (7,2,2001,7),
+ (8,2,2001,8);
+
+insert into x1_store_sales values (1,1),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10),(11,11);
+
+alter table x1_store_sales update statistics set(
+'numRows'='123456',
+'rawDataSize'='1234567');
+
+alter table x1_date_dim update statistics set(
+'numRows'='56',
+'rawDataSize'='81449');
+
+
+set hive.auto.convert.join.noconditionaltask.size=1;
+set hive.tez.dynamic.partition.pruning=true;
+set hive.tez.dynamic.semijoin.reduction=true;
+set hive.optimize.index.filter=true;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
+set hive.tez.min.bloom.filter.entries=1;
+set hive.tez.bloom.filter.factor=1.0f;
+set hive.explain.user=false;
+
+explain
+select sum(s.ss_item_sk)
+ from
+ x1_store_sales s
+ ,x1_date_dim d
+ where
+ 1=1
+ and s.ss_sold_date_sk=d.d_date_sk
+ and d.d_moy=3
+;
+
+explain reoptimization
+select sum(s.ss_item_sk)
+ from
+ x1_store_sales s
+ ,x1_date_dim d
+ where
+ 1=1
+ and s.ss_sold_date_sk=d.d_date_sk
+ and d.d_moy=3
+;
+
+
http://git-wip-us.apache.org/repos/asf/hive/blob/cac971b4/ql/src/test/results/clientpositive/llap/reopt_dpp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/reopt_dpp.q.out b/ql/src/test/results/clientpositive/llap/reopt_dpp.q.out
new file mode 100644
index 0000000..31726f6
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/reopt_dpp.q.out
@@ -0,0 +1,259 @@
+PREHOOK: query: drop table if exists x1_store_sales
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists x1_store_sales
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists x1_date_dim
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists x1_date_dim
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists x1_item
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists x1_item
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table x1_store_sales
+(
+ ss_item_sk int
+)
+partitioned by (ss_sold_date_sk int)
+stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x1_store_sales
+POSTHOOK: query: create table x1_store_sales
+(
+ ss_item_sk int
+)
+partitioned by (ss_sold_date_sk int)
+stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x1_store_sales
+PREHOOK: query: create table x1_date_dim
+(
+ d_date_sk int,
+ d_month_seq int,
+ d_year int,
+ d_moy int
+)
+stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x1_date_dim
+POSTHOOK: query: create table x1_date_dim
+(
+ d_date_sk int,
+ d_month_seq int,
+ d_year int,
+ d_moy int
+)
+stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x1_date_dim
+PREHOOK: query: insert into x1_date_dim values (1,1,2000,2),
+ (2,2,2001,2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@x1_date_dim
+POSTHOOK: query: insert into x1_date_dim values (1,1,2000,2),
+ (2,2,2001,2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@x1_date_dim
+POSTHOOK: Lineage: x1_date_dim.d_date_sk SCRIPT []
+POSTHOOK: Lineage: x1_date_dim.d_month_seq SCRIPT []
+POSTHOOK: Lineage: x1_date_dim.d_moy SCRIPT []
+POSTHOOK: Lineage: x1_date_dim.d_year SCRIPT []
+PREHOOK: query: insert into x1_store_sales partition (ss_sold_date_sk=1) values (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@x1_store_sales@ss_sold_date_sk=1
+POSTHOOK: query: insert into x1_store_sales partition (ss_sold_date_sk=1) values (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@x1_store_sales@ss_sold_date_sk=1
+POSTHOOK: Lineage: x1_store_sales PARTITION(ss_sold_date_sk=1).ss_item_sk SCRIPT []
+PREHOOK: query: insert into x1_store_sales partition (ss_sold_date_sk=2) values (2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@x1_store_sales@ss_sold_date_sk=2
+POSTHOOK: query: insert into x1_store_sales partition (ss_sold_date_sk=2) values (2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@x1_store_sales@ss_sold_date_sk=2
+POSTHOOK: Lineage: x1_store_sales PARTITION(ss_sold_date_sk=2).ss_item_sk SCRIPT []
+PREHOOK: query: alter table x1_store_sales partition (ss_sold_date_sk=1) update statistics set(
+'numRows'='123456',
+'rawDataSize'='1234567')
+PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS
+PREHOOK: Input: default@x1_store_sales
+PREHOOK: Output: default@x1_store_sales@ss_sold_date_sk=1
+POSTHOOK: query: alter table x1_store_sales partition (ss_sold_date_sk=1) update statistics set(
+'numRows'='123456',
+'rawDataSize'='1234567')
+POSTHOOK: type: ALTERTABLE_UPDATEPARTSTATS
+POSTHOOK: Input: default@x1_store_sales
+POSTHOOK: Input: default@x1_store_sales@ss_sold_date_sk=1
+POSTHOOK: Output: default@x1_store_sales@ss_sold_date_sk=1
+PREHOOK: query: alter table x1_date_dim update statistics set(
+'numRows'='56',
+'rawDataSize'='81449')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: default@x1_date_dim
+PREHOOK: Output: default@x1_date_dim
+POSTHOOK: query: alter table x1_date_dim update statistics set(
+'numRows'='56',
+'rawDataSize'='81449')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: default@x1_date_dim
+POSTHOOK: Output: default@x1_date_dim
+PREHOOK: query: explain
+select count(*) cnt
+ from
+ x1_store_sales s
+ ,x1_date_dim d
+ where
+ 1=1
+ and s.ss_sold_date_sk = d.d_date_sk
+ and d.d_year=2000
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) cnt
+ from
+ x1_store_sales s
+ ,x1_date_dim d
+ where
+ 1=1
+ and s.ss_sold_date_sk = d.d_date_sk
+ and d.d_year=2000
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 1 <- Map 3 (BROADCAST_EDGE)
+Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2 vectorized, llap
+ File Output Operator [FS_35]
+ Group By Operator [GBY_34] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count(VALUE._col0)"]
+ <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap
+ PARTITION_ONLY_SHUFFLE [RS_33]
+ Group By Operator [GBY_32] (rows=1 width=8)
+ Output:["_col0"],aggregations:["count()"]
+ Map Join Operator [MAPJOIN_31] (rows=1728398 width=8)
+ Conds:SEL_30._col0=RS_26._col0(Inner)
+ <-Map 3 [BROADCAST_EDGE] vectorized, llap
+ BROADCAST [RS_26]
+ PartitionCols:_col0
+ Select Operator [SEL_25] (rows=28 width=8)
+ Output:["_col0"]
+ Filter Operator [FIL_24] (rows=28 width=8)
+ predicate:((d_year = 2000) and d_date_sk is not null)
+ TableScan [TS_3] (rows=56 width=8)
+ default@x1_date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"]
+ Dynamic Partitioning Event Operator [EVENT_29] (rows=1 width=8)
+ Group By Operator [GBY_28] (rows=1 width=8)
+ Output:["_col0"],keys:_col0
+ Select Operator [SEL_27] (rows=28 width=8)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_25]
+ <-Select Operator [SEL_30] (rows=123457 width=4)
+ Output:["_col0"]
+ TableScan [TS_0] (rows=123457 width=14)
+ default@x1_store_sales,s,Tbl:COMPLETE,Col:COMPLETE
+
+PREHOOK: query: explain reoptimization
+select count(*) cnt
+ from
+ x1_store_sales s
+ ,x1_date_dim d
+ where
+ 1=1
+ and s.ss_sold_date_sk = d.d_date_sk
+ and d.d_year=2000
+PREHOOK: type: QUERY
+PREHOOK: Input: default@x1_date_dim
+PREHOOK: Input: default@x1_store_sales
+PREHOOK: Input: default@x1_store_sales@ss_sold_date_sk=1
+PREHOOK: Input: default@x1_store_sales@ss_sold_date_sk=2
+#### A masked pattern was here ####
+POSTHOOK: query: explain reoptimization
+select count(*) cnt
+ from
+ x1_store_sales s
+ ,x1_date_dim d
+ where
+ 1=1
+ and s.ss_sold_date_sk = d.d_date_sk
+ and d.d_year=2000
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@x1_date_dim
+POSTHOOK: Input: default@x1_store_sales
+POSTHOOK: Input: default@x1_store_sales@ss_sold_date_sk=1
+POSTHOOK: Input: default@x1_store_sales@ss_sold_date_sk=2
+#### A masked pattern was here ####
+PREHOOK: query: explain reoptimization
+select count(*) cnt
+ from
+ x1_store_sales s
+ ,x1_date_dim d
+ where
+ 1=1
+ and s.ss_sold_date_sk = d.d_date_sk
+ and d.d_year=2000
+PREHOOK: type: QUERY
+POSTHOOK: query: explain reoptimization
+select count(*) cnt
+ from
+ x1_store_sales s
+ ,x1_date_dim d
+ where
+ 1=1
+ and s.ss_sold_date_sk = d.d_date_sk
+ and d.d_year=2000
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 1 <- Map 3 (BROADCAST_EDGE)
+Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2 vectorized, llap
+ File Output Operator [FS_35]
+ Group By Operator [GBY_34] (runtime: rows=1 width=8)
+ Output:["_col0"],aggregations:["count(VALUE._col0)"]
+ <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap
+ PARTITION_ONLY_SHUFFLE [RS_33]
+ Group By Operator [GBY_32] (runtime: rows=1 width=8)
+ Output:["_col0"],aggregations:["count()"]
+ Map Join Operator [MAPJOIN_31] (runtime: rows=1 width=8)
+ Conds:SEL_30._col0=RS_26._col0(Inner)
+ <-Map 3 [BROADCAST_EDGE] vectorized, llap
+ BROADCAST [RS_26]
+ PartitionCols:_col0
+ Select Operator [SEL_25] (runtime: rows=1 width=8)
+ Output:["_col0"]
+ Filter Operator [FIL_24] (runtime: rows=1 width=8)
+ predicate:((d_year = 2000) and d_date_sk is not null)
+ TableScan [TS_3] (runtime: rows=2 width=8)
+ default@x1_date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"]
+ Dynamic Partitioning Event Operator [EVENT_29] (runtime: rows=1 width=8)
+ Group By Operator [GBY_28] (runtime: rows=1 width=8)
+ Output:["_col0"],keys:_col0
+ Select Operator [SEL_27] (runtime: rows=1 width=8)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_25]
+ <-Select Operator [SEL_30] (runtime: rows=1 width=4)
+ Output:["_col0"]
+ TableScan [TS_0] (rows=123457 width=14)
+ default@x1_store_sales,s,Tbl:COMPLETE,Col:COMPLETE
+
http://git-wip-us.apache.org/repos/asf/hive/blob/cac971b4/ql/src/test/results/clientpositive/llap/reopt_semijoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/reopt_semijoin.q.out b/ql/src/test/results/clientpositive/llap/reopt_semijoin.q.out
new file mode 100644
index 0000000..e60b207
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/reopt_semijoin.q.out
@@ -0,0 +1,420 @@
+PREHOOK: query: drop table if exists x1_store_sales
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists x1_store_sales
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists x1_date_dim
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists x1_date_dim
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table x1_store_sales
+(
+ ss_sold_date_sk int,
+ ss_item_sk int
+)
+stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x1_store_sales
+POSTHOOK: query: create table x1_store_sales
+(
+ ss_sold_date_sk int,
+ ss_item_sk int
+)
+stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x1_store_sales
+PREHOOK: query: create table x1_date_dim
+(
+ d_date_sk int,
+ d_month_seq int,
+ d_year int,
+ d_moy int
+)
+stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x1_date_dim
+POSTHOOK: query: create table x1_date_dim
+(
+ d_date_sk int,
+ d_month_seq int,
+ d_year int,
+ d_moy int
+)
+stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x1_date_dim
+PREHOOK: query: insert into x1_date_dim values (1,1,2000,1),
+ (2,2,2001,2),
+ (3,2,2001,3),
+ (4,2,2001,4),
+ (5,2,2001,5),
+ (6,2,2001,6),
+ (7,2,2001,7),
+ (8,2,2001,8)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@x1_date_dim
+POSTHOOK: query: insert into x1_date_dim values (1,1,2000,1),
+ (2,2,2001,2),
+ (3,2,2001,3),
+ (4,2,2001,4),
+ (5,2,2001,5),
+ (6,2,2001,6),
+ (7,2,2001,7),
+ (8,2,2001,8)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@x1_date_dim
+POSTHOOK: Lineage: x1_date_dim.d_date_sk SCRIPT []
+POSTHOOK: Lineage: x1_date_dim.d_month_seq SCRIPT []
+POSTHOOK: Lineage: x1_date_dim.d_moy SCRIPT []
+POSTHOOK: Lineage: x1_date_dim.d_year SCRIPT []
+PREHOOK: query: insert into x1_store_sales values (1,1),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10),(11,11)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@x1_store_sales
+POSTHOOK: query: insert into x1_store_sales values (1,1),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10),(11,11)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@x1_store_sales
+POSTHOOK: Lineage: x1_store_sales.ss_item_sk SCRIPT []
+POSTHOOK: Lineage: x1_store_sales.ss_sold_date_sk SCRIPT []
+PREHOOK: query: alter table x1_store_sales update statistics set(
+'numRows'='123456',
+'rawDataSize'='1234567')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: default@x1_store_sales
+PREHOOK: Output: default@x1_store_sales
+POSTHOOK: query: alter table x1_store_sales update statistics set(
+'numRows'='123456',
+'rawDataSize'='1234567')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: default@x1_store_sales
+POSTHOOK: Output: default@x1_store_sales
+PREHOOK: query: alter table x1_date_dim update statistics set(
+'numRows'='56',
+'rawDataSize'='81449')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: default@x1_date_dim
+PREHOOK: Output: default@x1_date_dim
+POSTHOOK: query: alter table x1_date_dim update statistics set(
+'numRows'='56',
+'rawDataSize'='81449')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: default@x1_date_dim
+POSTHOOK: Output: default@x1_date_dim
+PREHOOK: query: explain
+select sum(s.ss_item_sk)
+ from
+ x1_store_sales s
+ ,x1_date_dim d
+ where
+ 1=1
+ and s.ss_sold_date_sk=d.d_date_sk
+ and d.d_moy=3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select sum(s.ss_item_sk)
+ from
+ x1_store_sales s
+ ,x1_date_dim d
+ where
+ 1=1
+ and s.ss_sold_date_sk=d.d_date_sk
+ and d.d_moy=3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s
+ filterExpr: (ss_sold_date_sk is not null and (ss_sold_date_sk BETWEEN DynamicValue(RS_7_d_d_date_sk_min) AND DynamicValue(RS_7_d_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_d_d_date_sk_bloom_filter)))) (type: boolean)
+ Statistics: Num rows: 123456 Data size: 987648 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((ss_sold_date_sk BETWEEN DynamicValue(RS_7_d_d_date_sk_min) AND DynamicValue(RS_7_d_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_d_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 123456 Data size: 987648 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 123456 Data size: 987648 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 123456 Data size: 987648 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: d
+ filterExpr: ((d_moy = 3) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 56 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((d_moy = 3) and d_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ Statistics: Num rows: 86419 Data size: 345676 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain reoptimization
+select sum(s.ss_item_sk)
+ from
+ x1_store_sales s
+ ,x1_date_dim d
+ where
+ 1=1
+ and s.ss_sold_date_sk=d.d_date_sk
+ and d.d_moy=3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@x1_date_dim
+PREHOOK: Input: default@x1_store_sales
+#### A masked pattern was here ####
+POSTHOOK: query: explain reoptimization
+select sum(s.ss_item_sk)
+ from
+ x1_store_sales s
+ ,x1_date_dim d
+ where
+ 1=1
+ and s.ss_sold_date_sk=d.d_date_sk
+ and d.d_moy=3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@x1_date_dim
+POSTHOOK: Input: default@x1_store_sales
+#### A masked pattern was here ####
+PREHOOK: query: explain reoptimization
+select sum(s.ss_item_sk)
+ from
+ x1_store_sales s
+ ,x1_date_dim d
+ where
+ 1=1
+ and s.ss_sold_date_sk=d.d_date_sk
+ and d.d_moy=3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain reoptimization
+select sum(s.ss_item_sk)
+ from
+ x1_store_sales s
+ ,x1_date_dim d
+ where
+ 1=1
+ and s.ss_sold_date_sk=d.d_date_sk
+ and d.d_moy=3
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: s
+ filterExpr: (ss_sold_date_sk is not null and (ss_sold_date_sk BETWEEN DynamicValue(RS_7_d_d_date_sk_min) AND DynamicValue(RS_7_d_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_d_d_date_sk_bloom_filter)))) (type: boolean)
+ Statistics: Num rows: 123456 Data size: 987648 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((ss_sold_date_sk BETWEEN DynamicValue(RS_7_d_d_date_sk_min) AND DynamicValue(RS_7_d_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_d_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) (type: boolean)
+ Statistics: Num rows: 123456 Data size: 987648 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 123456 Data size: 987648 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 123456 Data size: 987648 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: d
+ filterExpr: ((d_moy = 3) and d_date_sk is not null) (type: boolean)
+ Statistics: (RUNTIME) Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((d_moy = 3) and d_date_sk is not null) (type: boolean)
+ Statistics: (RUNTIME) Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: d_date_sk (type: int)
+ outputColumnNames: _col0
+ Statistics: (RUNTIME) Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: (RUNTIME) Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: (RUNTIME) Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: (RUNTIME) Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: (RUNTIME) Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col1
+ Statistics: (RUNTIME) Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: (RUNTIME) Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: (RUNTIME) Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: (RUNTIME) Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: (RUNTIME) Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: (RUNTIME) Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
[3/3] hive git commit: HIVE-20088: Beeline config location path is
assembled incorrectly (Denes Bodo via Zoltan Haindrich)
Posted by kg...@apache.org.
HIVE-20088: Beeline config location path is assembled incorrectly (Denes Bodo via Zoltan Haindrich)
Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5ade7406
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5ade7406
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5ade7406
Branch: refs/heads/master
Commit: 5ade74060c9615a658d66fb0dac397671d2368ba
Parents: d118506
Author: Denes Bodo <bo...@gmail.com>
Authored: Thu Jul 12 09:01:17 2018 +0200
Committer: Zoltan Haindrich <ki...@rxd.hu>
Committed: Thu Jul 12 09:01:17 2018 +0200
----------------------------------------------------------------------
.../hs2connection/BeelineSiteParser.java | 2 +-
.../UserHS2ConnectionFileParser.java | 2 +-
.../hs2connection/TestBeelineSiteParser.java | 41 ++++++++++++++++++++
.../TestUserHS2ConnectionFileParser.java | 16 ++++++++
4 files changed, 59 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/5ade7406/beeline/src/java/org/apache/hive/beeline/hs2connection/BeelineSiteParser.java
----------------------------------------------------------------------
diff --git a/beeline/src/java/org/apache/hive/beeline/hs2connection/BeelineSiteParser.java b/beeline/src/java/org/apache/hive/beeline/hs2connection/BeelineSiteParser.java
index 600d84e..4c55104 100644
--- a/beeline/src/java/org/apache/hive/beeline/hs2connection/BeelineSiteParser.java
+++ b/beeline/src/java/org/apache/hive/beeline/hs2connection/BeelineSiteParser.java
@@ -63,7 +63,7 @@ public class BeelineSiteParser implements HS2ConnectionFileParser {
locations
.add(System.getenv("HIVE_CONF_DIR") + File.separator + DEFAULT_BEELINE_SITE_FILE_NAME);
}
- locations.add(ETC_HIVE_CONF_LOCATION + DEFAULT_BEELINE_SITE_FILE_NAME);
+ locations.add(ETC_HIVE_CONF_LOCATION + File.separator + DEFAULT_BEELINE_SITE_FILE_NAME);
}
@VisibleForTesting
http://git-wip-us.apache.org/repos/asf/hive/blob/5ade7406/beeline/src/java/org/apache/hive/beeline/hs2connection/UserHS2ConnectionFileParser.java
----------------------------------------------------------------------
diff --git a/beeline/src/java/org/apache/hive/beeline/hs2connection/UserHS2ConnectionFileParser.java b/beeline/src/java/org/apache/hive/beeline/hs2connection/UserHS2ConnectionFileParser.java
index 9d45daf..47dee4c 100644
--- a/beeline/src/java/org/apache/hive/beeline/hs2connection/UserHS2ConnectionFileParser.java
+++ b/beeline/src/java/org/apache/hive/beeline/hs2connection/UserHS2ConnectionFileParser.java
@@ -56,7 +56,7 @@ public class UserHS2ConnectionFileParser implements HS2ConnectionFileParser {
locations.add(
System.getenv("HIVE_CONF_DIR") + File.separator + DEFAULT_CONNECTION_CONFIG_FILE_NAME);
}
- locations.add(ETC_HIVE_CONF_LOCATION + DEFAULT_CONNECTION_CONFIG_FILE_NAME);
+ locations.add(ETC_HIVE_CONF_LOCATION + File.separator + DEFAULT_CONNECTION_CONFIG_FILE_NAME);
}
@VisibleForTesting
http://git-wip-us.apache.org/repos/asf/hive/blob/5ade7406/beeline/src/test/org/apache/hive/beeline/hs2connection/TestBeelineSiteParser.java
----------------------------------------------------------------------
diff --git a/beeline/src/test/org/apache/hive/beeline/hs2connection/TestBeelineSiteParser.java b/beeline/src/test/org/apache/hive/beeline/hs2connection/TestBeelineSiteParser.java
new file mode 100644
index 0000000..fc2b44d
--- /dev/null
+++ b/beeline/src/test/org/apache/hive/beeline/hs2connection/TestBeelineSiteParser.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hive.beeline.hs2connection;
+
+import java.io.File;
+import java.lang.reflect.Field;
+import java.util.Collection;
+import java.util.List;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestBeelineSiteParser {
+ @Test
+ public void testConfigLocationPathInEtc() throws Exception {
+ BeelineSiteParser testHS2ConfigManager =
+ new BeelineSiteParser();
+ Field locations = testHS2ConfigManager.getClass().getDeclaredField("locations");
+ locations.setAccessible(true);
+ Collection<String> locs = (List<String>)locations.get(testHS2ConfigManager);
+ Assert.assertTrue(locs.contains(
+ BeelineSiteParser.ETC_HIVE_CONF_LOCATION +
+ File.separator +
+ BeelineSiteParser.DEFAULT_BEELINE_SITE_FILE_NAME));
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/5ade7406/beeline/src/test/org/apache/hive/beeline/hs2connection/TestUserHS2ConnectionFileParser.java
----------------------------------------------------------------------
diff --git a/beeline/src/test/org/apache/hive/beeline/hs2connection/TestUserHS2ConnectionFileParser.java b/beeline/src/test/org/apache/hive/beeline/hs2connection/TestUserHS2ConnectionFileParser.java
index f5923d1..78c3a77 100644
--- a/beeline/src/test/org/apache/hive/beeline/hs2connection/TestUserHS2ConnectionFileParser.java
+++ b/beeline/src/test/org/apache/hive/beeline/hs2connection/TestUserHS2ConnectionFileParser.java
@@ -18,7 +18,9 @@
package org.apache.hive.beeline.hs2connection;
import java.io.File;
+import java.lang.reflect.Field;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import org.apache.hive.beeline.hs2connection.BeelineHS2ConnectionFileParseException;
@@ -171,6 +173,20 @@ public class TestUserHS2ConnectionFileParser {
LOCATION_2.equals(testHS2ConfigManager.getFileLocation()));
}
+ @Test
+ public void testConfigLocationPathInEtc() throws Exception {
+ UserHS2ConnectionFileParser testHS2ConfigManager =
+ new UserHS2ConnectionFileParser();
+ Field locations = testHS2ConfigManager.getClass().getDeclaredField("locations");
+ locations.setAccessible(true);
+ Collection<String> locs = (List<String>)locations.get(testHS2ConfigManager);
+ Assert.assertTrue(locs.contains(
+ UserHS2ConnectionFileParser.ETC_HIVE_CONF_LOCATION +
+ File.separator +
+ UserHS2ConnectionFileParser.DEFAULT_CONNECTION_CONFIG_FILE_NAME));
+
+ }
+
private String getParsedUrlFromConfigFile(String filename)
throws BeelineHS2ConnectionFileParseException {
String path = HiveTestUtils.getFileFromClasspath(filename);
[2/3] hive git commit: HIVE-15974: Support real,
double precision and numeric data types (Laszlo Bodor via Zoltan
Haindrich)
Posted by kg...@apache.org.
HIVE-15974: Support real, double precision and numeric data types (Laszlo Bodor via Zoltan Haindrich)
Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d1185065
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d1185065
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d1185065
Branch: refs/heads/master
Commit: d118506580b0e3a721cae718b28587bb542f63a8
Parents: cac971b
Author: Laszlo Bodor <bo...@gmail.com>
Authored: Thu Jul 12 08:55:00 2018 +0200
Committer: Zoltan Haindrich <ki...@rxd.hu>
Committed: Thu Jul 12 08:55:00 2018 +0200
----------------------------------------------------------------------
.../org/apache/hadoop/hive/ql/parse/HiveLexer.g | 1 +
.../org/apache/hadoop/hive/ql/parse/HiveParser.g | 3 +++
.../apache/hadoop/hive/ql/parse/IdentifiersParser.g | 3 ++-
.../ql/parse/TestSQL11ReservedKeyWordsNegative.java | 13 ++++++++++++-
ql/src/test/queries/clientpositive/real.q | 2 ++
ql/src/test/results/clientpositive/real.q.out | 16 ++++++++++++++++
6 files changed, 36 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/d1185065/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
index 43ad7dd..7a63cc4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
@@ -126,6 +126,7 @@ KW_SMALLINT: 'SMALLINT';
KW_INT: 'INT' | 'INTEGER';
KW_BIGINT: 'BIGINT';
KW_FLOAT: 'FLOAT';
+KW_REAL: 'REAL';
KW_DOUBLE: 'DOUBLE';
KW_PRECISION: 'PRECISION';
KW_DATE: 'DATE';
http://git-wip-us.apache.org/repos/asf/hive/blob/d1185065/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index 324c804..75a25d5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -123,6 +123,7 @@ TOK_INT;
TOK_BIGINT;
TOK_BOOLEAN;
TOK_FLOAT;
+TOK_REAL;
TOK_DOUBLE;
TOK_DATE;
TOK_DATELITERAL;
@@ -517,6 +518,7 @@ import org.apache.hadoop.hive.conf.HiveConf;
xlateMap.put("KW_INT", "INT");
xlateMap.put("KW_BIGINT", "BIGINT");
xlateMap.put("KW_FLOAT", "FLOAT");
+ xlateMap.put("KW_REAL", "REAL");
xlateMap.put("KW_DOUBLE", "DOUBLE");
xlateMap.put("KW_PRECISION", "PRECISION");
xlateMap.put("KW_DATE", "DATE");
@@ -2522,6 +2524,7 @@ primitiveType
| KW_BIGINT -> TOK_BIGINT
| KW_BOOLEAN -> TOK_BOOLEAN
| KW_FLOAT -> TOK_FLOAT
+ | KW_REAL -> TOK_FLOAT
| KW_DOUBLE KW_PRECISION? -> TOK_DOUBLE
| KW_DATE -> TOK_DATE
| KW_DATETIME -> TOK_DATETIME
http://git-wip-us.apache.org/repos/asf/hive/blob/d1185065/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index 7dc6146..9e43ad5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -721,6 +721,7 @@ sysFuncNames
| KW_INT
| KW_BIGINT
| KW_FLOAT
+ | KW_REAL
| KW_DOUBLE
| KW_BOOLEAN
| KW_STRING
@@ -844,5 +845,5 @@ nonReserved
//The following SQL2011 reserved keywords are used as function name only, but not as identifiers.
sql11ReservedKeywordsUsedAsFunctionName
:
- KW_IF | KW_ARRAY | KW_MAP | KW_BIGINT | KW_BINARY | KW_BOOLEAN | KW_CURRENT_DATE | KW_CURRENT_TIMESTAMP | KW_DATE | KW_DOUBLE | KW_FLOAT | KW_GROUPING | KW_INT | KW_SMALLINT | KW_TIMESTAMP
+ KW_IF | KW_ARRAY | KW_MAP | KW_BIGINT | KW_BINARY | KW_BOOLEAN | KW_CURRENT_DATE | KW_CURRENT_TIMESTAMP | KW_DATE | KW_DOUBLE | KW_FLOAT | KW_REAL | KW_GROUPING | KW_INT | KW_SMALLINT | KW_TIMESTAMP
;
http://git-wip-us.apache.org/repos/asf/hive/blob/d1185065/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java
index 2ad2990..ea0f4d6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java
@@ -35,7 +35,7 @@ import org.junit.Test;
* ,BOOLEAN,BOTH,BY,CONSTRAINT
* ,CREATE,CUBE,CURRENT_DATE,CURRENT_TIMESTAMP,CURSOR,
* DATE,DECIMAL,DELETE,DESCRIBE
- * ,DOUBLE,DROP,EXISTS,EXTERNAL,FALSE,FETCH,FLOAT,FOR
+ * ,DOUBLE,DROP,EXISTS,EXTERNAL,FALSE,FETCH,FLOAT,REAL,FOR
* ,FOREIGN,FULL,GRANT,GROUP,GROUPING
* ,IMPORT,IN,INNER,INSERT,INT,INTERSECT,INTO,IS
* ,LATERAL,LEFT,LIKE,LOCAL,MINUS,NONE,NULL
@@ -435,6 +435,17 @@ public class TestSQL11ReservedKeyWordsNegative {
}
}
+ @Test
+ public void testSQL11ReservedKeyWords_REAL() {
+ try {
+ parse("CREATE TABLE REAL (col STRING)");
+ Assert.assertFalse("Expected ParseException", true);
+ } catch (ParseException ex) {
+ Assert.assertEquals("Failure didn't match.",
+ "line 1:13 cannot recognize input near 'REAL' '(' 'col' in table name", ex.getMessage());
+ }
+ }
+
@Test
public void testSQL11ReservedKeyWords_FOR() {
try {
http://git-wip-us.apache.org/repos/asf/hive/blob/d1185065/ql/src/test/queries/clientpositive/real.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/real.q b/ql/src/test/queries/clientpositive/real.q
new file mode 100644
index 0000000..bdc5ff8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/real.q
@@ -0,0 +1,2 @@
+create table realtype (a real, b real);
+describe realtype;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/d1185065/ql/src/test/results/clientpositive/real.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/real.q.out b/ql/src/test/results/clientpositive/real.q.out
new file mode 100644
index 0000000..0a72fba
--- /dev/null
+++ b/ql/src/test/results/clientpositive/real.q.out
@@ -0,0 +1,16 @@
+PREHOOK: query: create table realtype (a real, b real)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@realtype
+POSTHOOK: query: create table realtype (a real, b real)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@realtype
+PREHOOK: query: describe realtype
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@realtype
+POSTHOOK: query: describe realtype
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@realtype
+a float
+b float