You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2017/02/28 20:03:11 UTC
hive git commit: HIVE-16040 : union column expansion should take
aliases from the leftmost branch (Sergey Shelukhin,
reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 95da916eb -> 928b4c015
HIVE-16040 : union column expansion should take aliases from the leftmost branch (Sergey Shelukhin, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/928b4c01
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/928b4c01
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/928b4c01
Branch: refs/heads/master
Commit: 928b4c01531470601a68950625bbe64faea95674
Parents: 95da916
Author: Sergey Shelukhin <se...@apache.org>
Authored: Tue Feb 28 11:59:29 2017 -0800
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Tue Feb 28 11:59:40 2017 -0800
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/parse/ParseUtils.java | 32 +-
.../queries/clientpositive/union_pos_alias.q | 20 +-
.../results/clientpositive/perf/query14.q.out | 4 +-
.../clientpositive/union_pos_alias.q.out | 307 +++++++++++++++++++
4 files changed, 356 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/928b4c01/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
index 473a664..54e37f7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
@@ -51,6 +51,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import com.google.common.base.Preconditions;
+
/**
* Library of utility functions used in the parse code.
@@ -400,10 +402,9 @@ public final class ParseUtils {
}
}
}
- // We find the SELECT closest to the top. This assumes there's only one FROM or FROM-s
- // are all equivalent (union case). Also, this assumption could be false for an already
- // malformed query; we don't check for that here - it will fail later anyway.
- // TODO: Maybe we should find ALL the SELECT-s not nested in another from, and compare.
+ // Note: we assume that this isn't an already malformed query;
+ // we don't check for that here - it will fail later anyway.
+ // First, we find the SELECT closest to the top.
ASTNode select = searcher.simpleBreadthFirstSearchAny((ASTNode)fromNode,
HiveParser.TOK_SELECT, HiveParser.TOK_SELECTDI);
if (select == null) {
@@ -412,6 +413,29 @@ public final class ParseUtils {
setCols.token.setType(HiveParser.TOK_ALLCOLREF);
return;
}
+
+ // Then, find the leftmost logical sibling select, because that's what Hive uses for aliases.
+ while (true) {
+ CommonTree queryOfSelect = select.parent;
+ while (queryOfSelect != null && queryOfSelect.getType() != HiveParser.TOK_QUERY) {
+ queryOfSelect = queryOfSelect.parent;
+ }
+ // We should have some QUERY; and also its parent because by supposition we are in subq.
+ if (queryOfSelect == null || queryOfSelect.parent == null) {
+ LOG.debug("Replacing SETCOLREF with ALLCOLREF because we couldn't find the QUERY");
+ setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+ return;
+ }
+ if (queryOfSelect.childIndex == 0) break; // We are the left-most child.
+ Tree moreToTheLeft = queryOfSelect.parent.getChild(0);
+ Preconditions.checkState(moreToTheLeft != queryOfSelect);
+ ASTNode newSelect = searcher.simpleBreadthFirstSearchAny((ASTNode)moreToTheLeft,
+ HiveParser.TOK_SELECT, HiveParser.TOK_SELECTDI);
+ Preconditions.checkState(newSelect != select);
+ select = newSelect;
+ // Repeat the procedure for the new select.
+ }
+
// Found the proper columns.
List<ASTNode> newChildren = new ArrayList<>(select.getChildCount());
HashSet<String> aliases = new HashSet<>();
http://git-wip-us.apache.org/repos/asf/hive/blob/928b4c01/ql/src/test/queries/clientpositive/union_pos_alias.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_pos_alias.q b/ql/src/test/queries/clientpositive/union_pos_alias.q
index c4eca68..60fea8f 100644
--- a/ql/src/test/queries/clientpositive/union_pos_alias.q
+++ b/ql/src/test/queries/clientpositive/union_pos_alias.q
@@ -27,4 +27,22 @@ UNION ALL
select 'test', value from src_10 s3
order by 2, 1 desc;
-drop table src_10;
\ No newline at end of file
+drop table src_10;
+
+
+drop view v;
+create view v as select key as k from src intersect all select key as k1 from src;
+desc formatted v;
+
+set hive.mapred.mode=nonstrict;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+
+create table masking_test as select cast(key as int) as key, value from src;
+
+explain
+select * from masking_test union all select * from masking_test ;
+select * from masking_test union all select * from masking_test ;
+
+explain
+select key as k1, value as v1 from masking_test where key > 0 intersect all select key as k2, value as v2 from masking_test where key > 0;
+select key as k1, value as v1 from masking_test where key > 0 intersect all select key as k2, value as v2 from masking_test where key > 0;
http://git-wip-us.apache.org/repos/asf/hive/blob/928b4c01/ql/src/test/results/clientpositive/perf/query14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query14.q.out b/ql/src/test/results/clientpositive/perf/query14.q.out
index 55a2e5b..9821180 100644
--- a/ql/src/test/results/clientpositive/perf/query14.q.out
+++ b/ql/src/test/results/clientpositive/perf/query14.q.out
@@ -1,9 +1,9 @@
+Warning: Shuffle Join MERGEJOIN[916][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 114' is a cross product
+Warning: Shuffle Join MERGEJOIN[917][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 115' is a cross product
Warning: Shuffle Join MERGEJOIN[914][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 61' is a cross product
Warning: Shuffle Join MERGEJOIN[915][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 62' is a cross product
Warning: Shuffle Join MERGEJOIN[912][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product
Warning: Shuffle Join MERGEJOIN[913][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product
-Warning: Shuffle Join MERGEJOIN[916][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 114' is a cross product
-Warning: Shuffle Join MERGEJOIN[917][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 115' is a cross product
PREHOOK: query: explain
with cross_items as
(select i_item_sk ss_item_sk
http://git-wip-us.apache.org/repos/asf/hive/blob/928b4c01/ql/src/test/results/clientpositive/union_pos_alias.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_pos_alias.q.out b/ql/src/test/results/clientpositive/union_pos_alias.q.out
index 8eddbd9..8ffbc2a 100644
--- a/ql/src/test/results/clientpositive/union_pos_alias.q.out
+++ b/ql/src/test/results/clientpositive/union_pos_alias.q.out
@@ -306,3 +306,310 @@ POSTHOOK: query: drop table src_10
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@src_10
POSTHOOK: Output: default@src_10
+PREHOOK: query: drop view v
+PREHOOK: type: DROPVIEW
+POSTHOOK: query: drop view v
+POSTHOOK: type: DROPVIEW
+PREHOOK: query: create view v as select key as k from src intersect all select key as k1 from src
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@v
+POSTHOOK: query: create view v as select key as k from src intersect all select key as k1 from src
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@v
+POSTHOOK: Lineage: v.k SCRIPT [(src)src.null, (src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: desc formatted v
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@v
+POSTHOOK: query: desc formatted v
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@v
+# col_name data_type comment
+
+k string
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+Table Type: VIRTUAL_VIEW
+Table Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: null
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+
+# View Information
+View Original Text: select key as k from src intersect all select key as k1 from src
+View Expanded Text: select `src`.`key` as `k` from `default`.`src` intersect all select `src`.`key` as `k1` from `default`.`src`
+View Rewrite Enabled: No
+PREHOOK: query: create table masking_test as select cast(key as int) as key, value from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@masking_test
+POSTHOOK: query: create table masking_test as select cast(key as int) as key, value from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@masking_test
+POSTHOOK: Lineage: masking_test.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: masking_test.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: explain
+select * from masking_test union all select * from masking_test
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from masking_test union all select * from masking_test
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: masking_test
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((key % 2) = 0) and (key < 10)) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 166 Data size: 1762 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 166 Data size: 1762 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TableScan
+ alias: masking_test
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((key % 2) = 0) and (key < 10)) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 166 Data size: 1762 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 166 Data size: 1762 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from masking_test union all select * from masking_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+POSTHOOK: query: select * from masking_test union all select * from masking_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+0 0_lav
+0 0_lav
+4 4_lav
+4 4_lav
+8 8_lav
+8 8_lav
+0 0_lav
+0 0_lav
+0 0_lav
+0 0_lav
+2 2_lav
+2 2_lav
+PREHOOK: query: explain
+select key as k1, value as v1 from masking_test where key > 0 intersect all select key as k2, value as v2 from masking_test where key > 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key as k1, value as v1 from masking_test where key > 0 intersect all select key as k2, value as v2 from masking_test where key > 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1, Stage-3
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: masking_test
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((key % 2) = 0) and (key < 10) and (key > 0)) (type: boolean)
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: int), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 137 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Union
+ Statistics: Num rows: 26 Data size: 274 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col2), count(_col2)
+ keys: _col0 (type: int), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 26 Data size: 274 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ Statistics: Num rows: 26 Data size: 274 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint), _col3 (type: bigint)
+ TableScan
+ Union
+ Statistics: Num rows: 26 Data size: 274 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col2), count(_col2)
+ keys: _col0 (type: int), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 26 Data size: 274 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ Statistics: Num rows: 26 Data size: 274 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint), _col3 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), count(VALUE._col1)
+ keys: KEY._col0 (type: int), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 137 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col3 = 2) (type: boolean)
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: bigint), _col0 (type: int), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ UDTF Operator
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ function name: UDTFReplicateRows
+ Select Operator
+ expressions: col1 (type: int), col2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: masking_test
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((key % 2) = 0) and (key < 10) and (key > 0)) (type: boolean)
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: int), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: int), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 137 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key as k1, value as v1 from masking_test where key > 0 intersect all select key as k2, value as v2 from masking_test where key > 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+POSTHOOK: query: select key as k1, value as v1 from masking_test where key > 0 intersect all select key as k2, value as v2 from masking_test where key > 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+2 2_lav
+4 4_lav
+8 8_lav