You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2016/11/21 18:54:25 UTC
[1/2] hive git commit: HIVE-15211: Provide support for complex
expressions in ON clauses for INNER joins (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master d94ebe8bb -> 893b2553a
http://git-wip-us.apache.org/repos/asf/hive/blob/893b2553/ql/src/test/results/clientpositive/join45.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join45.q.out b/ql/src/test/results/clientpositive/join45.q.out
new file mode 100644
index 0000000..18a7876
--- /dev/null
+++ b/ql/src/test/results/clientpositive/join45.q.out
@@ -0,0 +1,1771 @@
+PREHOOK: query: -- Conjunction with pred on multiple inputs and single inputs
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON (src1.key=src.key
+ AND src1.value between 100 and 102
+ AND src.value between 100 and 102)
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Conjunction with pred on multiple inputs and single inputs
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON (src1.key=src.key
+ AND src1.value between 100 and 102
+ AND src.value between 100 and 102)
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col1 BETWEEN 100 AND 102 (type: boolean)
+ Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col1 BETWEEN 100 AND 102 (type: boolean)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT *
+FROM src1 JOIN src
+ON (src1.key=src.key
+ AND src1.value between 100 and 102
+ AND src.value between 100 and 102)
+LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM src1 JOIN src
+ON (src1.key=src.key
+ AND src1.value between 100 and 102
+ AND src.value between 100 and 102)
+LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+PREHOOK: query: -- Conjunction with pred on multiple inputs and none
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON (src1.key=src.key AND true)
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Conjunction with pred on multiple inputs and none
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON (src1.key=src.key AND true)
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT *
+FROM src1 JOIN src
+ON (src1.key=src.key AND true)
+LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM src1 JOIN src
+ON (src1.key=src.key AND true)
+LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+128 128 val_128
+128 128 val_128
+128 128 val_128
+146 val_146 146 val_146
+146 val_146 146 val_146
+150 val_150 150 val_150
+213 val_213 213 val_213
+213 val_213 213 val_213
+224 224 val_224
+224 224 val_224
+Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: -- Conjunction with pred on single inputs and none
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON (src1.value between 100 and 102
+ AND src.value between 100 and 102
+ AND true)
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Conjunction with pred on single inputs and none
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON (src1.value between 100 and 102
+ AND src.value between 100 and 102
+ AND true)
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col1 BETWEEN 100 AND 102 (type: boolean)
+ Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: _col1 BETWEEN 100 AND 102 (type: boolean)
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 110 Data size: 2103 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: SELECT *
+FROM src1 JOIN src
+ON (src1.value between 100 and 102
+ AND src.value between 100 and 102
+ AND true)
+LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM src1 JOIN src
+ON (src1.value between 100 and 102
+ AND src.value between 100 and 102
+ AND true)
+LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: -- Disjunction with pred on multiple inputs and single inputs
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON (src1.key=src.key
+ OR src1.value between 100 and 102
+ OR src.value between 100 and 102)
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Disjunction with pred on multiple inputs and single inputs
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON (src1.key=src.key
+ OR src1.value between 100 and 102
+ OR src.value between 100 and 102)
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((_col0 = _col2) or _col1 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102) (type: boolean)
+ Statistics: Num rows: 9026 Data size: 173876 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: SELECT *
+FROM src1 JOIN src
+ON (src1.key=src.key
+ OR src1.value between 100 and 102
+ OR src.value between 100 and 102)
+LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM src1 JOIN src
+ON (src1.key=src.key
+ OR src1.value between 100 and 102
+ OR src.value between 100 and 102)
+LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+406 val_406 406 val_406
+406 val_406 406 val_406
+406 val_406 406 val_406
+406 val_406 406 val_406
+146 val_146 146 val_146
+146 val_146 146 val_146
+213 val_213 213 val_213
+213 val_213 213 val_213
+128 128 val_128
+128 128 val_128
+Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: -- Conjunction with multiple inputs on one side
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON (src1.key+src.key >= 100
+ AND src1.key+src.key <= 102)
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Conjunction with multiple inputs on one side
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON (src1.key+src.key >= 100
+ AND src1.key+src.key <= 102)
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((UDFToDouble(_col0) + UDFToDouble(_col2)) <= 102.0) and ((UDFToDouble(_col0) + UDFToDouble(_col2)) >= 100.0)) (type: boolean)
+ Statistics: Num rows: 1388 Data size: 26738 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: SELECT *
+FROM src1 JOIN src
+ON (src1.key+src.key >= 100
+ AND src1.key+src.key <= 102)
+LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM src1 JOIN src
+ON (src1.key+src.key >= 100
+ AND src1.key+src.key <= 102)
+LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+66 val_66 35 val_35
+66 val_66 34 val_34
+66 val_66 35 val_35
+66 val_66 35 val_35
+98 val_98 2 val_2
+98 val_98 4 val_4
+Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: -- Disjunction with multiple inputs on one side
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON (src1.key+src.key >= 100
+ OR src1.key+src.key <= 102)
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Disjunction with multiple inputs on one side
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON (src1.key+src.key >= 100
+ OR src1.key+src.key <= 102)
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((UDFToDouble(_col0) + UDFToDouble(_col2)) >= 100.0) or ((UDFToDouble(_col0) + UDFToDouble(_col2)) <= 102.0)) (type: boolean)
+ Statistics: Num rows: 8332 Data size: 160507 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: SELECT *
+FROM src1 JOIN src
+ON (src1.key+src.key >= 100
+ OR src1.key+src.key <= 102)
+LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM src1 JOIN src
+ON (src1.key+src.key >= 100
+ OR src1.key+src.key <= 102)
+LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+406 val_406 97 val_97
+406 val_406 200 val_200
+406 val_406 400 val_400
+406 val_406 403 val_403
+406 val_406 169 val_169
+406 val_406 90 val_90
+406 val_406 126 val_126
+406 val_406 222 val_222
+406 val_406 477 val_477
+406 val_406 414 val_414
+Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: -- Function with multiple inputs on one side
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON ((src1.key,src.key) IN ((100,100),(101,101),(102,102)))
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Function with multiple inputs on one side
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON ((src1.key,src.key) IN ((100,100),(101,101),(102,102)))
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (struct(_col0,_col2)) IN (const struct(100,100), const struct(101,101), const struct(102,102)) (type: boolean)
+ Statistics: Num rows: 3125 Data size: 60200 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: SELECT *
+FROM src1 JOIN src
+ON ((src1.key,src.key) IN ((100,100),(101,101),(102,102)))
+LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM src1 JOIN src
+ON ((src1.key,src.key) IN ((100,100),(101,101),(102,102)))
+LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: -- Chained 1
+EXPLAIN
+SELECT *
+FROM src
+JOIN src1 a ON (a.key+src.key >= 100)
+LEFT OUTER JOIN src1 b ON (b.key = src.key)
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Chained 1
+EXPLAIN
+SELECT *
+FROM src
+JOIN src1 a ON (a.key+src.key >= 100)
+LEFT OUTER JOIN src1 b ON (b.key = src.key)
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: b
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ TableScan
+ alias: a
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 13750 Data size: 264875 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((UDFToDouble(_col4) + UDFToDouble(_col0)) >= 100.0) (type: boolean)
+ Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col2 (type: string), _col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: SELECT *
+FROM src
+JOIN src1 a ON (a.key+src.key >= 100)
+LEFT OUTER JOIN src1 b ON (b.key = src.key)
+LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM src
+JOIN src1 a ON (a.key+src.key >= 100)
+LEFT OUTER JOIN src1 b ON (b.key = src.key)
+LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+98 val_98 406 val_406 98 val_98
+98 val_98 146 val_146 98 val_98
+98 val_98 213 val_213 98 val_98
+98 val_98 128 98 val_98
+98 val_98 66 val_66 98 val_98
+98 val_98 369 98 val_98
+98 val_98 224 98 val_98
+98 val_98 273 val_273 98 val_98
+98 val_98 150 val_150 98 val_98
+98 val_98 401 val_401 98 val_98
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: -- Chained 2
+EXPLAIN
+SELECT *
+FROM src
+LEFT OUTER JOIN src1 a ON (a.key = src.key)
+JOIN src1 b ON (b.key+src.key<= 102)
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Chained 2
+EXPLAIN
+SELECT *
+FROM src
+LEFT OUTER JOIN src1 a ON (a.key = src.key)
+JOIN src1 b ON (b.key+src.key<= 102)
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: a
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Left Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ TableScan
+ alias: b
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 13750 Data size: 264875 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((UDFToDouble(_col4) + UDFToDouble(_col0)) <= 102.0) (type: boolean)
+ Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: SELECT *
+FROM src
+LEFT OUTER JOIN src1 a ON (a.key = src.key)
+JOIN src1 b ON (b.key+src.key<= 102)
+LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM src
+LEFT OUTER JOIN src1 a ON (a.key = src.key)
+JOIN src1 b ON (b.key+src.key<= 102)
+LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+9 val_9 NULL NULL 66 val_66
+8 val_8 NULL NULL 66 val_66
+5 val_5 NULL NULL 66 val_66
+5 val_5 NULL NULL 66 val_66
+5 val_5 NULL NULL 66 val_66
+4 val_4 NULL NULL 66 val_66
+4 val_4 NULL NULL 98 val_98
+35 val_35 NULL NULL 66 val_66
+35 val_35 NULL NULL 66 val_66
+35 val_35 NULL NULL 66 val_66
+Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: -- Chained 3
+EXPLAIN
+SELECT *
+FROM src
+JOIN src1 a ON (a.key+src.key >= 100)
+RIGHT OUTER JOIN src1 b ON (b.key = src.key)
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Chained 3
+EXPLAIN
+SELECT *
+FROM src
+JOIN src1 a ON (a.key+src.key >= 100)
+RIGHT OUTER JOIN src1 b ON (b.key = src.key)
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ TableScan
+ alias: a
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((UDFToDouble(_col2) + UDFToDouble(_col0)) >= 100.0) (type: boolean)
+ Statistics: Num rows: 4166 Data size: 80253 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 4166 Data size: 80253 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ TableScan
+ alias: b
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 4582 Data size: 88278 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: SELECT *
+FROM src
+JOIN src1 a ON (a.key+src.key >= 100)
+RIGHT OUTER JOIN src1 b ON (b.key = src.key)
+LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM src
+JOIN src1 a ON (a.key+src.key >= 100)
+RIGHT OUTER JOIN src1 b ON (b.key = src.key)
+LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+NULL NULL NULL NULL val_484
+NULL NULL NULL NULL
+NULL NULL NULL NULL
+NULL NULL NULL NULL
+NULL NULL NULL NULL val_27
+NULL NULL NULL NULL val_165
+NULL NULL NULL NULL val_409
+NULL NULL NULL NULL
+NULL NULL NULL NULL val_193
+NULL NULL NULL NULL val_265
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: -- Chained 4
+EXPLAIN
+SELECT *
+FROM src
+RIGHT OUTER JOIN src1 a ON (a.key = src.key)
+JOIN src1 b ON (b.key+src.key<= 102)
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Chained 4
+EXPLAIN
+SELECT *
+FROM src
+RIGHT OUTER JOIN src1 a ON (a.key = src.key)
+JOIN src1 b ON (b.key+src.key<= 102)
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: a
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ TableScan
+ alias: b
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 13750 Data size: 264875 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((UDFToDouble(_col4) + UDFToDouble(_col0)) <= 102.0) (type: boolean)
+ Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: SELECT *
+FROM src
+RIGHT OUTER JOIN src1 a ON (a.key = src.key)
+JOIN src1 b ON (b.key+src.key<= 102)
+LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM src
+RIGHT OUTER JOIN src1 a ON (a.key = src.key)
+JOIN src1 b ON (b.key+src.key<= 102)
+LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: -- Chained 5
+EXPLAIN
+SELECT *
+FROM src
+JOIN src1 a ON (a.key+src.key >= 100)
+FULL OUTER JOIN src1 b ON (b.key = src.key)
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Chained 5
+EXPLAIN
+SELECT *
+FROM src
+JOIN src1 a ON (a.key+src.key >= 100)
+FULL OUTER JOIN src1 b ON (b.key = src.key)
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ TableScan
+ alias: a
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((UDFToDouble(_col2) + UDFToDouble(_col0)) >= 100.0) (type: boolean)
+ Statistics: Num rows: 4166 Data size: 80253 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 4166 Data size: 80253 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ TableScan
+ alias: b
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 4582 Data size: 88278 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: SELECT *
+FROM src
+JOIN src1 a ON (a.key+src.key >= 100)
+FULL OUTER JOIN src1 b ON (b.key = src.key)
+LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM src
+JOIN src1 a ON (a.key+src.key >= 100)
+FULL OUTER JOIN src1 b ON (b.key = src.key)
+LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+NULL NULL NULL NULL val_484
+NULL NULL NULL NULL
+NULL NULL NULL NULL
+NULL NULL NULL NULL
+NULL NULL NULL NULL val_27
+NULL NULL NULL NULL val_165
+NULL NULL NULL NULL val_409
+NULL NULL NULL NULL
+NULL NULL NULL NULL val_193
+NULL NULL NULL NULL val_265
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: -- Chained 6
+EXPLAIN
+SELECT *
+FROM src
+FULL OUTER JOIN src1 a ON (a.key = src.key)
+JOIN src1 b ON (b.key+src.key<= 102)
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Chained 6
+EXPLAIN
+SELECT *
+FROM src
+FULL OUTER JOIN src1 a ON (a.key = src.key)
+JOIN src1 b ON (b.key+src.key<= 102)
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: a
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ TableScan
+ alias: b
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 13750 Data size: 264875 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((UDFToDouble(_col4) + UDFToDouble(_col0)) <= 102.0) (type: boolean)
+ Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product
+PREHOOK: query: SELECT *
+FROM src
+FULL OUTER JOIN src1 a ON (a.key = src.key)
+JOIN src1 b ON (b.key+src.key<= 102)
+LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM src
+FULL OUTER JOIN src1 a ON (a.key = src.key)
+JOIN src1 b ON (b.key+src.key<= 102)
+LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+9 val_9 NULL NULL 66 val_66
+8 val_8 NULL NULL 66 val_66
+5 val_5 NULL NULL 66 val_66
+5 val_5 NULL NULL 66 val_66
+5 val_5 NULL NULL 66 val_66
+4 val_4 NULL NULL 66 val_66
+4 val_4 NULL NULL 98 val_98
+35 val_35 NULL NULL 66 val_66
+35 val_35 NULL NULL 66 val_66
+35 val_35 NULL NULL 66 val_66
+Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: -- Right outer join with multiple inner joins and mixed conditions
+EXPLAIN
+SELECT *
+FROM cbo_t1 t1
+RIGHT OUTER JOIN cbo_t1 t2 ON (t2.key = t1.key)
+JOIN cbo_t1 t3 ON (t3.key = t2.key or t3.value = t2.value and t2.c_int = t1.c_int)
+JOIN cbo_t1 t4 ON (t4.key = t2.key or t2.c_float = t4.c_float and t4.value = t2.value)
+JOIN cbo_t1 t5 ON (t5.key = t2.key or t2.c_boolean = t4.c_boolean and t5.c_int = 42)
+LIMIT 10
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Right outer join with multiple inner joins and mixed conditions
+EXPLAIN
+SELECT *
+FROM cbo_t1 t1
+RIGHT OUTER JOIN cbo_t1 t2 ON (t2.key = t1.key)
+JOIN cbo_t1 t3 ON (t3.key = t2.key or t3.value = t2.value and t2.c_int = t1.c_int)
+JOIN cbo_t1 t4 ON (t4.key = t2.key or t2.c_float = t4.c_float and t4.value = t2.value)
+JOIN cbo_t1 t5 ON (t5.key = t2.key or t2.c_boolean = t4.c_boolean and t5.c_int = 42)
+LIMIT 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-4 is a root stage
+ Stage-1 depends on stages: Stage-4
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string)
+ TableScan
+ alias: t2
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Right Outer Join0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 22 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t3
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string)
+ TableScan
+ alias: t5
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string)
+ TableScan
+ alias: t4
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string)
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 22 Data size: 288 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: float), _col10 (type: boolean), _col11 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ Inner Join 0 to 3
+ keys:
+ 0
+ 1
+ 2
+ 3
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29
+ Statistics: Num rows: 176000 Data size: 9396800 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((_col0 = _col24) or ((_col1 = _col25) and (_col26 = _col20))) and ((_col12 = _col24) or ((_col27 = _col15) and (_col13 = _col25))) and ((_col6 = _col24) or ((_col28 = _col16) and (_col8 = 42)))) (type: boolean)
+ Statistics: Num rows: 74250 Data size: 3964275 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col18 (type: string), _col19 (type: string), _col20 (type: int), _col21 (type: float), _col22 (type: boolean), _col23 (type: string), _col24 (type: string), _col25 (type: string), _col26 (type: int), _col27 (type: float), _col28 (type: boolean), _col29 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: float), _col16 (type: boolean), _col17 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: float), _col10 (type: boolean), _col11 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29
+ Statistics: Num rows: 74250 Data size: 3964275 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 530 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 530 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 10
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: SELECT *
+FROM cbo_t1 t1
+RIGHT OUTER JOIN cbo_t1 t2 ON (t2.key = t1.key)
+JOIN cbo_t1 t3 ON (t3.key = t2.key or t3.value = t2.value and t2.c_int = t1.c_int)
+JOIN cbo_t1 t4 ON (t4.key = t2.key or t2.c_float = t4.c_float and t4.value = t2.value)
+JOIN cbo_t1 t5 ON (t5.key = t2.key or t2.c_boolean = t4.c_boolean and t5.c_int = 42)
+LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cbo_t1
+PREHOOK: Input: default@cbo_t1@dt=2014
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM cbo_t1 t1
+RIGHT OUTER JOIN cbo_t1 t2 ON (t2.key = t1.key)
+JOIN cbo_t1 t3 ON (t3.key = t2.key or t3.value = t2.value and t2.c_int = t1.c_int)
+JOIN cbo_t1 t4 ON (t4.key = t2.key or t2.c_float = t4.c_float and t4.value = t2.value)
+JOIN cbo_t1 t5 ON (t5.key = t2.key or t2.c_boolean = t4.c_boolean and t5.c_int = 42)
+LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cbo_t1
+POSTHOOK: Input: default@cbo_t1@dt=2014
+#### A masked pattern was here ####
+ 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014
+ 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014
+ 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014
+ 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014
+ 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014
+ 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014
+ 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014
+ 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014
+ 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014
+ 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014 1 1 1 1.0 true 2014
[2/2] hive git commit: HIVE-15211: Provide support for complex
expressions in ON clauses for INNER joins (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
Posted by jc...@apache.org.
HIVE-15211: Provide support for complex expressions in ON clauses for INNER joins (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/893b2553
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/893b2553
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/893b2553
Branch: refs/heads/master
Commit: 893b2553a8f79e600dd6f1dbee48fbbe0b40b58b
Parents: d94ebe8
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Tue Nov 15 21:48:08 2016 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Mon Nov 21 18:51:58 2016 +0000
----------------------------------------------------------------------
.../org/apache/hadoop/hive/ql/ErrorMsg.java | 8 +-
.../JoinCondTypeCheckProcFactory.java | 116 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 5 +-
.../apache/hadoop/hive/ql/parse/QBJoinTree.java | 2 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 78 +-
ql/src/test/queries/clientnegative/join45.q | 13 +
ql/src/test/queries/clientpositive/join45.q | 203 ++
ql/src/test/results/clientnegative/join45.q.out | 13 +
ql/src/test/results/clientpositive/join45.q.out | 1771 ++++++++++++++++++
9 files changed, 2057 insertions(+), 152 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/893b2553/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 97fcd55..b62df35 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -27,7 +27,6 @@ import java.util.regex.Pattern;
import org.antlr.runtime.tree.Tree;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.ASTNodeOrigin;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
/**
* List of all error messages.
@@ -70,9 +69,9 @@ public enum ErrorMsg {
INVALID_ARGUMENT(10014, "Wrong arguments"),
INVALID_ARGUMENT_LENGTH(10015, "Arguments length mismatch", "21000"),
INVALID_ARGUMENT_TYPE(10016, "Argument type mismatch"),
- INVALID_JOIN_CONDITION_1(10017, "Both left and right aliases encountered in JOIN"),
- INVALID_JOIN_CONDITION_2(10018, "Neither left nor right aliases encountered in JOIN"),
- INVALID_JOIN_CONDITION_3(10019, "OR not supported in JOIN currently"),
+ @Deprecated INVALID_JOIN_CONDITION_1(10017, "Both left and right aliases encountered in JOIN"),
+ @Deprecated INVALID_JOIN_CONDITION_2(10018, "Neither left nor right aliases encountered in JOIN"),
+ @Deprecated INVALID_JOIN_CONDITION_3(10019, "OR not supported in JOIN currently"),
INVALID_TRANSFORM(10020, "TRANSFORM with other SELECT columns not supported"),
UNSUPPORTED_MULTIPLE_DISTINCTS(10022, "DISTINCT on different columns not supported" +
" with skew in data"),
@@ -460,6 +459,7 @@ public enum ErrorMsg {
"requires \"AND <boolean>\" on the 1st WHEN MATCHED clause of <{0}>", true),
MERGE_TOO_MANY_DELETE(10405, "MERGE statment can have at most 1 WHEN MATCHED ... DELETE clause: <{0}>", true),
MERGE_TOO_MANY_UPDATE(10406, "MERGE statment can have at most 1 WHEN MATCHED ... UPDATE clause: <{0}>", true),
+ INVALID_JOIN_CONDITION(10407, "Complex condition not supported for (LEFT|RIGHT|FULL) OUTER JOIN"),
//========================== 20000 range starts here ========================//
SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."),
SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your custom script. "
http://git-wip-us.apache.org/repos/asf/hive/blob/893b2553/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinCondTypeCheckProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinCondTypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinCondTypeCheckProcFactory.java
index 9128d81..cf665ee 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinCondTypeCheckProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinCondTypeCheckProcFactory.java
@@ -18,17 +18,12 @@
package org.apache.hadoop.hive.ql.optimizer.calcite.translator;
import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import java.util.Stack;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.FunctionInfo;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.parse.ASTNode;
@@ -41,11 +36,6 @@ import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
/**
* JoinCondTypeCheckProcFactory is used by Calcite planner(CBO) to generate Join Conditions from Join Condition AST.
@@ -117,7 +107,7 @@ public class JoinCondTypeCheckProcFactory extends TypeCheckProcFactory {
}
if (tblAliasCnt > 1) {
- throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr));
+ throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_OR_COLUMN.getMsg(expr));
}
return (tblAliasCnt == 1) ? true : false;
@@ -132,7 +122,7 @@ public class JoinCondTypeCheckProcFactory extends TypeCheckProcFactory {
tmp = rr.get(tabName, colAlias);
if (tmp != null) {
if (cInfoToRet != null) {
- throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr));
+ throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_OR_COLUMN.getMsg(expr));
}
cInfoToRet = tmp;
}
@@ -194,7 +184,7 @@ public class JoinCondTypeCheckProcFactory extends TypeCheckProcFactory {
tmp = rr.get(tabName, colAlias);
if (tmp != null) {
if (cInfoToRet != null) {
- throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr));
+ throw new SemanticException(ErrorMsg.AMBIGUOUS_TABLE_OR_COLUMN.getMsg(expr));
}
cInfoToRet = tmp;
}
@@ -202,106 +192,6 @@ public class JoinCondTypeCheckProcFactory extends TypeCheckProcFactory {
return cInfoToRet;
}
-
- @Override
- protected void validateUDF(ASTNode expr, boolean isFunction, TypeCheckCtx ctx, FunctionInfo fi,
- List<ExprNodeDesc> children, GenericUDF genericUDF) throws SemanticException {
- super.validateUDF(expr, isFunction, ctx, fi, children, genericUDF);
-
- JoinTypeCheckCtx jCtx = (JoinTypeCheckCtx) ctx;
-
- // Join Condition can not contain disjunctions
- if (genericUDF instanceof GenericUDFOPOr) {
- throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_3.getMsg(expr));
- }
-
- // Non Conjunctive elements have further limitations in Join conditions
- if (!(genericUDF instanceof GenericUDFOPAnd)) {
- // Non Comparison UDF other than 'and' can not use inputs from both side
- if (!(genericUDF instanceof GenericUDFBaseCompare)) {
- if (genericUDFargsRefersToBothInput(genericUDF, children, jCtx.getInputRRList())) {
- throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr));
- }
- } else if (genericUDF instanceof GenericUDFBaseCompare) {
- // Comparisons of non literals LHS/RHS can not refer to inputs from
- // both sides
- if (children.size() == 2 && !(children.get(0) instanceof ExprNodeConstantDesc)
- && !(children.get(1) instanceof ExprNodeConstantDesc)) {
- if (comparisonUDFargsRefersToBothInput((GenericUDFBaseCompare) genericUDF, children,
- jCtx.getInputRRList())) {
- throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1.getMsg(expr));
- }
- }
- }
- }
- }
-
- private static boolean genericUDFargsRefersToBothInput(GenericUDF udf,
- List<ExprNodeDesc> children, List<RowResolver> inputRRList) {
- boolean argsRefersToBothInput = false;
-
- Map<Integer, ExprNodeDesc> hasCodeToColDescMap = new HashMap<Integer, ExprNodeDesc>();
- for (ExprNodeDesc child : children) {
- ExprNodeDescUtils.getExprNodeColumnDesc(child, hasCodeToColDescMap);
- }
- Set<Integer> inputRef = getInputRef(hasCodeToColDescMap.values(), inputRRList);
-
- if (inputRef.size() > 1)
- argsRefersToBothInput = true;
-
- return argsRefersToBothInput;
- }
-
- private static boolean comparisonUDFargsRefersToBothInput(GenericUDFBaseCompare comparisonUDF,
- List<ExprNodeDesc> children, List<RowResolver> inputRRList) {
- boolean argsRefersToBothInput = false;
-
- Map<Integer, ExprNodeDesc> lhsHashCodeToColDescMap = new HashMap<Integer, ExprNodeDesc>();
- Map<Integer, ExprNodeDesc> rhsHashCodeToColDescMap = new HashMap<Integer, ExprNodeDesc>();
- ExprNodeDescUtils.getExprNodeColumnDesc(children.get(0), lhsHashCodeToColDescMap);
- ExprNodeDescUtils.getExprNodeColumnDesc(children.get(1), rhsHashCodeToColDescMap);
- Set<Integer> lhsInputRef = getInputRef(lhsHashCodeToColDescMap.values(), inputRRList);
- Set<Integer> rhsInputRef = getInputRef(rhsHashCodeToColDescMap.values(), inputRRList);
-
- if (lhsInputRef.size() > 1 || rhsInputRef.size() > 1)
- argsRefersToBothInput = true;
-
- return argsRefersToBothInput;
- }
-
- private static Set<Integer> getInputRef(Collection<ExprNodeDesc> colDescSet,
- List<RowResolver> inputRRList) {
- String tableAlias;
- RowResolver inputRR;
- Set<Integer> inputLineage = new HashSet<Integer>();
-
- for (ExprNodeDesc col : colDescSet) {
- ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) col;
- tableAlias = colDesc.getTabAlias();
-
- for (int i = 0; i < inputRRList.size(); i++) {
- inputRR = inputRRList.get(i);
-
- // If table Alias is present check if InputRR has that table and then
- // check for internal name
- // else if table alias is null then check with internal name in all
- // inputRR.
- if (tableAlias != null) {
- if (inputRR.hasTableAlias(tableAlias)) {
- if (inputRR.doesInvRslvMapContain(colDesc.getColumn())) {
- inputLineage.add(i);
- }
- }
- } else {
- if (inputRR.doesInvRslvMapContain(colDesc.getColumn())) {
- inputLineage.add(i);
- }
- }
- }
- }
-
- return inputLineage;
- }
}
/**
http://git-wip-us.apache.org/repos/asf/hive/blob/893b2553/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 78011c2..6965f8f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1543,12 +1543,11 @@ public class CalcitePlanner extends SemanticAnalyzer {
JoinTypeCheckCtx jCtx = new JoinTypeCheckCtx(leftRR, rightRR, hiveJoinType);
Map<ASTNode, ExprNodeDesc> exprNodes = JoinCondTypeCheckProcFactory.genExprNode(joinCond,
jCtx);
- if (jCtx.getError() != null)
+ if (jCtx.getError() != null) {
throw new SemanticException(SemanticAnalyzer.generateErrorMessage(jCtx.getErrorSrcNode(),
jCtx.getError()));
-
+ }
ExprNodeDesc joinCondnExprNode = exprNodes.get(joinCond);
-
List<RelNode> inputRels = new ArrayList<RelNode>();
inputRels.add(leftRel);
inputRels.add(rightRel);
http://git-wip-us.apache.org/repos/asf/hive/blob/893b2553/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java
index a3e95ce..ec76fb7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBJoinTree.java
@@ -420,7 +420,7 @@ public class QBJoinTree implements Serializable, Cloneable {
// clone postJoinFilters
for (ASTNode filter : postJoinFilters) {
- cloned.getPostJoinFilters().add(filter);
+ cloned.addPostJoinFilter(filter);
}
// clone rhsSemijoin
for (Entry<String, ArrayList<ASTNode>> entry : rhsSemijoin.entrySet()) {
http://git-wip-us.apache.org/repos/asf/hive/blob/893b2553/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 7d8b2bd..3bc6fe4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -65,7 +65,6 @@ import org.apache.hadoop.hive.conf.HiveConf.StrictChecks;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
-import org.apache.hadoop.hive.metastore.api.AddDynamicPartitions;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
@@ -2581,8 +2580,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
List<String> rightAliases, ASTNode condn, QBJoinTree joinTree,
List<String> leftSrc) throws SemanticException {
if ((leftAliases.size() != 0) && (rightAliases.size() != 0)) {
- throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1
- .getMsg(condn));
+ joinTree.addPostJoinFilter(condn);
+ return;
}
if (rightAliases.size() != 0) {
@@ -2596,8 +2595,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
}
}
} else {
- throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_2
- .getMsg(condn));
+ joinTree.addPostJoinFilter(condn);
}
}
@@ -2791,8 +2789,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
switch (joinCond.getToken().getType()) {
case HiveParser.KW_OR:
- throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_3
- .getMsg(joinCond));
+ joinTree.addPostJoinFilter(joinCond);
+ break;
case HiveParser.KW_AND:
parseJoinCondition(joinTree, (ASTNode) joinCond.getChild(0), leftSrc, type, aliasToOpInfo);
@@ -2821,15 +2819,13 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
// * join is right outer and filter is on right alias
if (((leftCondAl1.size() != 0) && (leftCondAl2.size() != 0))
|| ((rightCondAl1.size() != 0) && (rightCondAl2.size() != 0))) {
- throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1
- .getMsg(joinCond));
+ joinTree.addPostJoinFilter(joinCond);
+ } else {
+ applyEqualityPredicateToQBJoinTree(joinTree, type, leftSrc,
+ joinCond, leftCondn, rightCondn,
+ leftCondAl1, leftCondAl2,
+ rightCondAl1, rightCondAl2);
}
-
- applyEqualityPredicateToQBJoinTree(joinTree, type, leftSrc,
- joinCond, leftCondn, rightCondn,
- leftCondAl1, leftCondAl2,
- rightCondAl1, rightCondAl2);
-
break;
default:
@@ -2871,23 +2867,22 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
}
if (!leftAliasNull && !rightAliasNull) {
- throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION_1
- .getMsg(joinCond));
- }
-
- if (!leftAliasNull) {
- if (type.equals(JoinType.LEFTOUTER)
- || type.equals(JoinType.FULLOUTER)) {
- joinTree.getFilters().get(0).add(joinCond);
- } else {
- joinTree.getFiltersForPushing().get(0).add(joinCond);
- }
+ joinTree.addPostJoinFilter(joinCond);
} else {
- if (type.equals(JoinType.RIGHTOUTER)
- || type.equals(JoinType.FULLOUTER)) {
- joinTree.getFilters().get(1).add(joinCond);
+ if (!leftAliasNull) {
+ if (type.equals(JoinType.LEFTOUTER)
+ || type.equals(JoinType.FULLOUTER)) {
+ joinTree.getFilters().get(0).add(joinCond);
+ } else {
+ joinTree.getFiltersForPushing().get(0).add(joinCond);
+ }
} else {
- joinTree.getFiltersForPushing().get(1).add(joinCond);
+ if (type.equals(JoinType.RIGHTOUTER)
+ || type.equals(JoinType.FULLOUTER)) {
+ joinTree.getFilters().get(1).add(joinCond);
+ } else {
+ joinTree.getFiltersForPushing().get(1).add(joinCond);
+ }
}
}
@@ -8107,9 +8102,16 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
joinOp.getConf().setQBJoinTreeProps(joinTree);
joinContext.put(joinOp, joinTree);
+ // Safety check for postconditions; currently we do not support them for outer join
+ if (joinTree.getPostJoinFilters().size() != 0 && !joinTree.getNoOuterJoin()) {
+ throw new SemanticException(ErrorMsg.INVALID_JOIN_CONDITION.getMsg());
+ }
Operator op = joinOp;
- for(ASTNode condn : joinTree.getPostJoinFilters() ) {
+ for(ASTNode condn : joinTree.getPostJoinFilters()) {
op = genFilterPlan(qb, condn, op, false);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Generated " + op + " with post-filtering conditions after JOIN operator");
+ }
}
return op;
}
@@ -8928,6 +8930,15 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
}
target.setMapAliases(mapAliases);
}
+
+ if (node.getPostJoinFilters().size() != 0) {
+ // Safety check: if we are merging join operators and there are post-filtering
+ // conditions, they cannot be outer joins
+ assert node.getNoOuterJoin() && target.getNoOuterJoin();
+ for (ASTNode exprPostFilter : node.getPostJoinFilters()) {
+ target.addPostJoinFilter(exprPostFilter);
+ }
+ }
}
private ObjectPair<Integer, int[]> findMergePos(QBJoinTree node, QBJoinTree target) {
@@ -9035,6 +9046,11 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
if (prevType != null && prevType != currType) {
break;
}
+ if ((!node.getNoOuterJoin() && node.getPostJoinFilters().size() != 0) ||
+ (!target.getNoOuterJoin() && target.getPostJoinFilters().size() != 0)) {
+ // Outer joins with post-filtering conditions cannot be merged
+ break;
+ }
ObjectPair<Integer, int[]> mergeDetails = findMergePos(node, target);
int pos = mergeDetails.getFirst();
if (pos >= 0) {
http://git-wip-us.apache.org/repos/asf/hive/blob/893b2553/ql/src/test/queries/clientnegative/join45.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/join45.q b/ql/src/test/queries/clientnegative/join45.q
new file mode 100644
index 0000000..4e8db96
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/join45.q
@@ -0,0 +1,13 @@
+set hive.strict.checks.cartesian.product=false;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE mytable(val1 INT, val2 INT, val3 INT);
+
+-- Outer join with complex pred: not supported
+EXPLAIN
+SELECT *
+FROM mytable src1 LEFT OUTER JOIN mytable src2
+ON (src1.val1+src2.val1>= 2450816
+ AND src1.val1+src2.val1<= 2451500);
+
http://git-wip-us.apache.org/repos/asf/hive/blob/893b2553/ql/src/test/queries/clientpositive/join45.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/join45.q b/ql/src/test/queries/clientpositive/join45.q
new file mode 100644
index 0000000..54e422d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/join45.q
@@ -0,0 +1,203 @@
+set hive.strict.checks.cartesian.product=false;
+
+-- Conjunction with pred on multiple inputs and single inputs
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON (src1.key=src.key
+ AND src1.value between 100 and 102
+ AND src.value between 100 and 102)
+LIMIT 10;
+
+SELECT *
+FROM src1 JOIN src
+ON (src1.key=src.key
+ AND src1.value between 100 and 102
+ AND src.value between 100 and 102)
+LIMIT 10;
+
+-- Conjunction with pred on multiple inputs and none
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON (src1.key=src.key AND true)
+LIMIT 10;
+
+SELECT *
+FROM src1 JOIN src
+ON (src1.key=src.key AND true)
+LIMIT 10;
+
+-- Conjunction with pred on single inputs and none
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON (src1.value between 100 and 102
+ AND src.value between 100 and 102
+ AND true)
+LIMIT 10;
+
+SELECT *
+FROM src1 JOIN src
+ON (src1.value between 100 and 102
+ AND src.value between 100 and 102
+ AND true)
+LIMIT 10;
+
+-- Disjunction with pred on multiple inputs and single inputs
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON (src1.key=src.key
+ OR src1.value between 100 and 102
+ OR src.value between 100 and 102)
+LIMIT 10;
+
+SELECT *
+FROM src1 JOIN src
+ON (src1.key=src.key
+ OR src1.value between 100 and 102
+ OR src.value between 100 and 102)
+LIMIT 10;
+
+-- Conjunction with multiple inputs on one side
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON (src1.key+src.key >= 100
+ AND src1.key+src.key <= 102)
+LIMIT 10;
+
+SELECT *
+FROM src1 JOIN src
+ON (src1.key+src.key >= 100
+ AND src1.key+src.key <= 102)
+LIMIT 10;
+
+-- Disjunction with multiple inputs on one side
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON (src1.key+src.key >= 100
+ OR src1.key+src.key <= 102)
+LIMIT 10;
+
+SELECT *
+FROM src1 JOIN src
+ON (src1.key+src.key >= 100
+ OR src1.key+src.key <= 102)
+LIMIT 10;
+
+-- Function with multiple inputs on one side
+EXPLAIN
+SELECT *
+FROM src1 JOIN src
+ON ((src1.key,src.key) IN ((100,100),(101,101),(102,102)))
+LIMIT 10;
+
+SELECT *
+FROM src1 JOIN src
+ON ((src1.key,src.key) IN ((100,100),(101,101),(102,102)))
+LIMIT 10;
+
+-- Chained 1
+EXPLAIN
+SELECT *
+FROM src
+JOIN src1 a ON (a.key+src.key >= 100)
+LEFT OUTER JOIN src1 b ON (b.key = src.key)
+LIMIT 10;
+
+SELECT *
+FROM src
+JOIN src1 a ON (a.key+src.key >= 100)
+LEFT OUTER JOIN src1 b ON (b.key = src.key)
+LIMIT 10;
+
+-- Chained 2
+EXPLAIN
+SELECT *
+FROM src
+LEFT OUTER JOIN src1 a ON (a.key = src.key)
+JOIN src1 b ON (b.key+src.key<= 102)
+LIMIT 10;
+
+SELECT *
+FROM src
+LEFT OUTER JOIN src1 a ON (a.key = src.key)
+JOIN src1 b ON (b.key+src.key<= 102)
+LIMIT 10;
+
+-- Chained 3
+EXPLAIN
+SELECT *
+FROM src
+JOIN src1 a ON (a.key+src.key >= 100)
+RIGHT OUTER JOIN src1 b ON (b.key = src.key)
+LIMIT 10;
+
+SELECT *
+FROM src
+JOIN src1 a ON (a.key+src.key >= 100)
+RIGHT OUTER JOIN src1 b ON (b.key = src.key)
+LIMIT 10;
+
+-- Chained 4
+EXPLAIN
+SELECT *
+FROM src
+RIGHT OUTER JOIN src1 a ON (a.key = src.key)
+JOIN src1 b ON (b.key+src.key<= 102)
+LIMIT 10;
+
+SELECT *
+FROM src
+RIGHT OUTER JOIN src1 a ON (a.key = src.key)
+JOIN src1 b ON (b.key+src.key<= 102)
+LIMIT 10;
+
+-- Chained 5
+EXPLAIN
+SELECT *
+FROM src
+JOIN src1 a ON (a.key+src.key >= 100)
+FULL OUTER JOIN src1 b ON (b.key = src.key)
+LIMIT 10;
+
+SELECT *
+FROM src
+JOIN src1 a ON (a.key+src.key >= 100)
+FULL OUTER JOIN src1 b ON (b.key = src.key)
+LIMIT 10;
+
+-- Chained 6
+EXPLAIN
+SELECT *
+FROM src
+FULL OUTER JOIN src1 a ON (a.key = src.key)
+JOIN src1 b ON (b.key+src.key<= 102)
+LIMIT 10;
+
+SELECT *
+FROM src
+FULL OUTER JOIN src1 a ON (a.key = src.key)
+JOIN src1 b ON (b.key+src.key<= 102)
+LIMIT 10;
+
+-- Right outer join with multiple inner joins and mixed conditions
+EXPLAIN
+SELECT *
+FROM cbo_t1 t1
+RIGHT OUTER JOIN cbo_t1 t2 ON (t2.key = t1.key)
+JOIN cbo_t1 t3 ON (t3.key = t2.key or t3.value = t2.value and t2.c_int = t1.c_int)
+JOIN cbo_t1 t4 ON (t4.key = t2.key or t2.c_float = t4.c_float and t4.value = t2.value)
+JOIN cbo_t1 t5 ON (t5.key = t2.key or t2.c_boolean = t4.c_boolean and t5.c_int = 42)
+LIMIT 10;
+
+SELECT *
+FROM cbo_t1 t1
+RIGHT OUTER JOIN cbo_t1 t2 ON (t2.key = t1.key)
+JOIN cbo_t1 t3 ON (t3.key = t2.key or t3.value = t2.value and t2.c_int = t1.c_int)
+JOIN cbo_t1 t4 ON (t4.key = t2.key or t2.c_float = t4.c_float and t4.value = t2.value)
+JOIN cbo_t1 t5 ON (t5.key = t2.key or t2.c_boolean = t4.c_boolean and t5.c_int = 42)
+LIMIT 10;
http://git-wip-us.apache.org/repos/asf/hive/blob/893b2553/ql/src/test/results/clientnegative/join45.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/join45.q.out b/ql/src/test/results/clientnegative/join45.q.out
new file mode 100644
index 0000000..87ef769
--- /dev/null
+++ b/ql/src/test/results/clientnegative/join45.q.out
@@ -0,0 +1,13 @@
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE mytable(val1 INT, val2 INT, val3 INT)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mytable
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+CREATE TABLE mytable(val1 INT, val2 INT, val3 INT)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mytable
+FAILED: SemanticException [Error 10407]: Complex condition not supported for (LEFT|RIGHT|FULL) OUTER JOIN