You are viewing a plain text version of this content. The canonical link for it is here.
Posted to gitbox@hive.apache.org by GitBox <gi...@apache.org> on 2021/05/20 16:59:52 UTC

[GitHub] [hive] soumyakanti3578 commented on a change in pull request #2302: [HIVE-25090] Join condition parsing error in subquery

soumyakanti3578 commented on a change in pull request #2302:
URL: https://github.com/apache/hive/pull/2302#discussion_r636287623



##########
File path: ql/src/test/results/clientpositive/llap/subquery_corr_join.q.out
##########
@@ -0,0 +1,212 @@
+PREHOOK: query: create table alltypestiny(
+id int,
+int_col int,
+bigint_col bigint,
+bool_col boolean
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@alltypestiny
+POSTHOOK: query: create table alltypestiny(
+id int,
+int_col int,
+bigint_col bigint,
+bool_col boolean
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@alltypestiny
+PREHOOK: query: insert into alltypestiny(id, int_col, bigint_col, bool_col) values
+(1, 1, 10, true),
+(2, 4, 5, false),
+(3, 5, 15, true),
+(10, 10, 30, false)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@alltypestiny
+POSTHOOK: query: insert into alltypestiny(id, int_col, bigint_col, bool_col) values
+(1, 1, 10, true),
+(2, 4, 5, false),
+(3, 5, 15, true),
+(10, 10, 30, false)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@alltypestiny
+POSTHOOK: Lineage: alltypestiny.bigint_col SCRIPT []
+POSTHOOK: Lineage: alltypestiny.bool_col SCRIPT []
+POSTHOOK: Lineage: alltypestiny.id SCRIPT []
+POSTHOOK: Lineage: alltypestiny.int_col SCRIPT []
+PREHOOK: query: create table alltypesagg(
+id int,
+int_col int,
+bool_col boolean
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@alltypesagg
+POSTHOOK: query: create table alltypesagg(
+id int,
+int_col int,
+bool_col boolean
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@alltypesagg
+PREHOOK: query: insert into alltypesagg(id, int_col, bool_col) values
+(1, 1, true),
+(2, 4, false),
+(5, 6, true),
+(null, null, false)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@alltypesagg
+POSTHOOK: query: insert into alltypesagg(id, int_col, bool_col) values
+(1, 1, true),
+(2, 4, false),
+(5, 6, true),
+(null, null, false)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@alltypesagg
+POSTHOOK: Lineage: alltypesagg.bool_col SCRIPT []
+POSTHOOK: Lineage: alltypesagg.id SCRIPT []
+POSTHOOK: Lineage: alltypesagg.int_col SCRIPT []
+Warning: Shuffle Join MERGEJOIN[64][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+PREHOOK: query: explain cbo select *
+from alltypesagg t1
+where t1.id not in
+    (select tt1.id
+     from alltypestiny tt1 left JOIN alltypesagg tt2
+     on tt1.int_col = tt2.int_col)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesagg
+PREHOOK: Input: default@alltypestiny
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo select *
+from alltypesagg t1
+where t1.id not in
+    (select tt1.id
+     from alltypestiny tt1 left JOIN alltypesagg tt2
+     on tt1.int_col = tt2.int_col)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesagg
+POSTHOOK: Input: default@alltypestiny
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(id=[$0], int_col=[$1], bool_col=[$2])
+  HiveFilter(condition=[OR(=($3, 0), AND(IS NULL($6), >=($4, $3), IS NOT NULL($0)))])
+    HiveProject(id=[$0], int_col=[$1], bool_col=[$2], c=[$5], ck=[$6], id0=[$3], literalTrue=[$4])
+      HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
+        HiveJoin(condition=[=($0, $3)], joinType=[left], algorithm=[none], cost=[not available])
+          HiveProject(id=[$0], int_col=[$1], bool_col=[$2])
+            HiveTableScan(table=[[default, alltypesagg]], table:alias=[t1])
+          HiveProject(id=[$0], literalTrue=[true])
+            HiveAggregate(group=[{0}])
+              HiveJoin(condition=[=($1, $2)], joinType=[left], algorithm=[none], cost=[not available])
+                HiveProject(id=[$0], int_col=[$1])
+                  HiveFilter(condition=[IS NOT NULL($0)])
+                    HiveTableScan(table=[[default, alltypestiny]], table:alias=[tt1])
+                HiveProject(int_col=[$1])
+                  HiveFilter(condition=[IS NOT NULL($1)])
+                    HiveTableScan(table=[[default, alltypesagg]], table:alias=[tt2])
+        HiveProject(c=[$0], ck=[$1])
+          HiveAggregate(group=[{}], c=[COUNT()], ck=[COUNT($0)])
+            HiveJoin(condition=[=($1, $2)], joinType=[left], algorithm=[none], cost=[not available])
+              HiveProject(id=[$0], int_col=[$1])
+                HiveTableScan(table=[[default, alltypestiny]], table:alias=[tt1])
+              HiveProject(int_col=[$1])
+                HiveFilter(condition=[IS NOT NULL($1)])
+                  HiveTableScan(table=[[default, alltypesagg]], table:alias=[tt2])
+
+Warning: Shuffle Join MERGEJOIN[64][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product
+PREHOOK: query: select *
+from alltypesagg t1
+where t1.id not in
+    (select tt1.id
+     from alltypestiny tt1 left JOIN alltypesagg tt2
+     on tt1.int_col = tt2.int_col)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesagg
+PREHOOK: Input: default@alltypestiny
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from alltypesagg t1
+where t1.id not in
+    (select tt1.id
+     from alltypestiny tt1 left JOIN alltypesagg tt2
+     on tt1.int_col = tt2.int_col)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesagg
+POSTHOOK: Input: default@alltypestiny
+#### A masked pattern was here ####
+5	6	true
+Warning: Shuffle Join MERGEJOIN[63][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 6' is a cross product
+Warning: Shuffle Join MERGEJOIN[65][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product
+PREHOOK: query: explain cbo select *
+from alltypesagg t1
+where t1.id not in
+    (select tt1.id
+     from alltypestiny tt1 left JOIN alltypesagg tt2
+     on tt1.int_col = t1.int_col)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesagg
+PREHOOK: Input: default@alltypestiny
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo select *
+from alltypesagg t1
+where t1.id not in
+    (select tt1.id
+     from alltypestiny tt1 left JOIN alltypesagg tt2
+     on tt1.int_col = t1.int_col)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesagg
+POSTHOOK: Input: default@alltypestiny
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(id=[$0], int_col=[$1], bool_col=[$2])
+  HiveFilter(condition=[OR(IS NULL($4), =($4, 0), IS NOT TRUE(OR(IS NOT NULL($7), IS NULL($0), <($5, $4))))])
+    HiveJoin(condition=[AND(=($0, $6), =($8, $1))], joinType=[left], algorithm=[none], cost=[not available])
+      HiveJoin(condition=[=($3, $1)], joinType=[left], algorithm=[none], cost=[not available])
+        HiveProject(id=[$0], int_col=[$1], bool_col=[$2])
+          HiveTableScan(table=[[default, alltypesagg]], table:alias=[t1])
+        HiveProject(int_col=[$0], c=[$1], ck=[$2])
+          HiveAggregate(group=[{1}], c=[COUNT()], ck=[COUNT($0)])
+            HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available])
+              HiveProject(id=[$0], int_col=[$1])
+                HiveFilter(condition=[IS NOT NULL($1)])
+                  HiveTableScan(table=[[default, alltypestiny]], table:alias=[tt1])
+              HiveProject(DUMMY=[0])
+                HiveTableScan(table=[[default, alltypesagg]], table:alias=[tt2])
+      HiveProject(id=[$0], literalTrue=[true], int_col=[$1])
+        HiveAggregate(group=[{0, 1}])
+          HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available])
+            HiveProject(id=[$0], int_col=[$1])
+              HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0))])
+                HiveTableScan(table=[[default, alltypestiny]], table:alias=[tt1])
+            HiveProject(DUMMY=[0])
+              HiveTableScan(table=[[default, alltypesagg]], table:alias=[tt2])
+
+Warning: Shuffle Join MERGEJOIN[63][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 6' is a cross product
+Warning: Shuffle Join MERGEJOIN[65][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product
+PREHOOK: query: select *
+from alltypesagg t1
+where t1.id not in
+    (select tt1.id
+     from alltypestiny tt1 left JOIN alltypesagg tt2
+     on tt1.int_col = t1.int_col)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesagg
+PREHOOK: Input: default@alltypestiny
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from alltypesagg t1
+where t1.id not in
+    (select tt1.id
+     from alltypestiny tt1 left JOIN alltypesagg tt2
+     on tt1.int_col = t1.int_col)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesagg
+POSTHOOK: Input: default@alltypestiny
+#### A masked pattern was here ####
+5	6	true
+NULL	NULL	false

Review comment:
       This row should not be there in the output. Checked on Postgres, and also the uncorrelated query doesn't output this.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscribe@hive.apache.org
For additional commands, e-mail: gitbox-help@hive.apache.org