You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hive.apache.org by "Ryu Kobayashi (Jira)" <ji...@apache.org> on 2023/05/10 09:30:00 UTC

[jira] [Updated] (HIVE-27088) Using MergeJoin and using filters does not work

     [ https://issues.apache.org/jira/browse/HIVE-27088?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Ryu Kobayashi updated HIVE-27088:
---------------------------------
    Description: 
When hive.merge.nway.joins is set to `true` and JOIN is performed with INNER and OUTER and a filter exists, normal results cannot be obtained.

For example:
{code:java}
-- Data preparation
create temporary table foo (id bigint, code string) stored as orc;
create temporary table bar (id bigint, code string) stored as orc;
create temporary table baz (id bigint) stored as orc;
INSERT INTO foo values
  (29999000052073, '01'),
  (29999000052107, '01'),
  (29999000052111, '01'),
  (29999000052112, '01'),
  (29999000052113, '01'),
  (29999000052114, '01'),
  (29999000052071, '01A'),
  (29999000052072, '01A'),
  (29999000052116, '01A'),
  (29999000052117, '01A'),
  (29999000052118, '01A'),
  (29999000052119, '01A'),
  (29999000052120, '01A'),
  (29999000052076, '06'),
  (29999000052074, '06A'),
  (29999000052075, '06A');INSERT INTO bar values
  (29999000052071, '01'),
  (29999000052072, '01'),
  (29999000052073, '01'),
  (29999000052116, '01'),
  (29999000052117, '01'),
  (29999000052071, '01A'),
  (29999000052072, '01A'),
  (29999000052073, '01A'),
  (29999000052116, '01AS'),
  (29999000052117, '01AS'),
  (29999000052071, '01B'),
  (29999000052072, '01B'),
  (29999000052073, '01B'),
  (29999000052116, '01BS'),
  (29999000052117, '01BS'),
  (29999000052071, '01C'),
  (29999000052072, '01C'),
  (29999000052073, '01C7'),
  (29999000052116, '01CS'),
  (29999000052117, '01CS'),
  (29999000052071, '01D'),
  (29999000052072, '01D'),
  (29999000052073, '01D'),
  (29999000052116, '01DS'),
  (29999000052117, '01DS');INSERT INTO baz values
  (29999000052071),
  (29999000052072),
  (29999000052073),
  (29999000052074),
  (29999000052075),
  (29999000052076),
  (29999000052107),
  (29999000052111),
  (29999000052112),
  (29999000052113),
  (29999000052114),
  (29999000052116),
  (29999000052117),
  (29999000052118),
  (29999000052119),
  (29999000052120);{code}
Normal works(set hive.merge.nway.joins=false):
{code:java}
hive> set hive.merge.nway.joins=false;
hive> SELECT
  a.id,
  b.code,
  c.id
FROM bar AS a
INNER JOIN foo AS b
ON a.id = b.id
  AND (a.code = '01AS' OR b.code = '01BS')
LEFT OUTER JOIN baz AS c
ON a.id = c.id;

OK
29999000052116  01A     29999000052116
29999000052117  01A     29999000052117 {code}
Abnormal works(set hive.merge.nway.joins=true):
{code:java}
hive> set hive.merge.nway.joins=true;
hive> SELECT
  a.id,
  b.code,
  c.id
FROM bar AS a
INNER JOIN foo AS b
ON a.id = b.id
  AND (a.code = '01AS' OR b.code = '01BS')
LEFT OUTER JOIN baz AS c
ON a.id = c.id;

OK 29999000052071  01A     NULL
29999000052072  01A     NULL
29999000052073  01      NULL
29999000052116  01A     NULL
29999000052117  01A     NULL
29999000052071  01A     NULL
29999000052072  01A     NULL
29999000052073  01      NULL
29999000052116  01A     29999000052116
29999000052117  01A     29999000052117
29999000052071  01A     NULL
29999000052072  01A     NULL
29999000052073  01      NULL
29999000052116  01A     NULL
29999000052117  01A     NULL
29999000052071  01A     NULL
29999000052072  01A     NULL
29999000052073  01      NULL
29999000052116  01A     NULL
29999000052117  01A     NULL
29999000052071  01A     NULL
29999000052072  01A     NULL
29999000052073  01      NULL
29999000052116  01A     NULL
29999000052117  01A     NULL   {code}
 

I think this is also related to the next ticket: https://issues.apache.org/jira/browse/HIVE-21322

  was:
When hive.merge.nway.joins is set to `true` and JOIN is performed with INNER and OUTER and a filter exists, normal results cannot be obtained.

For example:
{code:java}
-- Data preparation
create temporary table foo (id bigint, code string) stored as orc;
create temporary table bar (id bigint, code string) stored as orc;
create temporary table baz (id bigint) stored as orc;
INSERT INTO foo values
  (29999000052073, '01'),
  (29999000052107, '01'),
  (29999000052111, '01'),
  (29999000052112, '01'),
  (29999000052113, '01'),
  (29999000052114, '01'),
  (29999000052071, '01A'),
  (29999000052072, '01A'),
  (29999000052116, '01A'),
  (29999000052117, '01A'),
  (29999000052118, '01A'),
  (29999000052119, '01A'),
  (29999000052120, '01A'),
  (29999000052076, '06'),
  (29999000052074, '06A'),
  (29999000052075, '06A');INSERT INTO bar values
  (29999000052071, '01'),
  (29999000052072, '01'),
  (29999000052073, '01'),
  (29999000052116, '01'),
  (29999000052117, '01'),
  (29999000052071, '01A'),
  (29999000052072, '01A'),
  (29999000052073, '01A'),
  (29999000052116, '01AS'),
  (29999000052117, '01AS'),
  (29999000052071, '01B'),
  (29999000052072, '01B'),
  (29999000052073, '01B'),
  (29999000052116, '01BS'),
  (29999000052117, '01BS'),
  (29999000052071, '01C'),
  (29999000052072, '01C'),
  (29999000052073, '01C7'),
  (29999000052116, '01CS'),
  (29999000052117, '01CS'),
  (29999000052071, '01D'),
  (29999000052072, '01D'),
  (29999000052073, '01D'),
  (29999000052116, '01DS'),
  (29999000052117, '01DS');INSERT INTO baz values
  (29999000052071),
  (29999000052072),
  (29999000052073),
  (29999000052074),
  (29999000052075),
  (29999000052076),
  (29999000052107),
  (29999000052111),
  (29999000052112),
  (29999000052113),
  (29999000052114),
  (29999000052116),
  (29999000052117),
  (29999000052118),
  (29999000052119),
  (29999000052120);{code}
 

Normal works(set hive.merge.nway.joins=false):
{code:java}
hive> set hive.merge.nway.joins=false;
hive> SELECT
  a.id,
  b.code,
  c.id
FROM bar AS a
INNER JOIN foo AS b
ON a.id = b.id
  AND (a.code = '01AS' OR b.code = '01BS')
LEFT OUTER JOIN baz AS c
ON a.id = c.id;

OK
29999000052116  01A     29999000052116
29999000052117  01A     29999000052117 {code}
Abnormal works(set hive.merge.nway.joins=true):
{code:java}
hive> set hive.merge.nway.joins=true;
hive> SELECT
  a.id,
  b.code,
  c.id
FROM bar AS a
INNER JOIN foo AS b
ON a.id = b.id
  AND (a.code = '01AS' OR b.code = '01BS')
LEFT OUTER JOIN baz AS c
ON a.id = c.id;

OK 29999000052071  01A     NULL
29999000052072  01A     NULL
29999000052073  01      NULL
29999000052116  01A     NULL
29999000052117  01A     NULL
29999000052071  01A     NULL
29999000052072  01A     NULL
29999000052073  01      NULL
29999000052116  01A     29999000052116
29999000052117  01A     29999000052117
29999000052071  01A     NULL
29999000052072  01A     NULL
29999000052073  01      NULL
29999000052116  01A     NULL
29999000052117  01A     NULL
29999000052071  01A     NULL
29999000052072  01A     NULL
29999000052073  01      NULL
29999000052116  01A     NULL
29999000052117  01A     NULL
29999000052071  01A     NULL
29999000052072  01A     NULL
29999000052073  01      NULL
29999000052116  01A     NULL
29999000052117  01A     NULL   {code}
 

I think this is also related to the next ticket: https://issues.apache.org/jira/browse/HIVE-21322


> Using MergeJoin and using filters does not work
> -----------------------------------------------
>
>                 Key: HIVE-27088
>                 URL: https://issues.apache.org/jira/browse/HIVE-27088
>             Project: Hive
>          Issue Type: Bug
>          Components: Query Planning
>            Reporter: Ryu Kobayashi
>            Assignee: Ryu Kobayashi
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 2h 50m
>  Remaining Estimate: 0h
>
> When hive.merge.nway.joins is set to `true` and JOIN is performed with INNER and OUTER and a filter exists, normal results cannot be obtained.
> For example:
> {code:java}
> -- Data preparation
> create temporary table foo (id bigint, code string) stored as orc;
> create temporary table bar (id bigint, code string) stored as orc;
> create temporary table baz (id bigint) stored as orc;
> INSERT INTO foo values
>   (29999000052073, '01'),
>   (29999000052107, '01'),
>   (29999000052111, '01'),
>   (29999000052112, '01'),
>   (29999000052113, '01'),
>   (29999000052114, '01'),
>   (29999000052071, '01A'),
>   (29999000052072, '01A'),
>   (29999000052116, '01A'),
>   (29999000052117, '01A'),
>   (29999000052118, '01A'),
>   (29999000052119, '01A'),
>   (29999000052120, '01A'),
>   (29999000052076, '06'),
>   (29999000052074, '06A'),
>   (29999000052075, '06A');INSERT INTO bar values
>   (29999000052071, '01'),
>   (29999000052072, '01'),
>   (29999000052073, '01'),
>   (29999000052116, '01'),
>   (29999000052117, '01'),
>   (29999000052071, '01A'),
>   (29999000052072, '01A'),
>   (29999000052073, '01A'),
>   (29999000052116, '01AS'),
>   (29999000052117, '01AS'),
>   (29999000052071, '01B'),
>   (29999000052072, '01B'),
>   (29999000052073, '01B'),
>   (29999000052116, '01BS'),
>   (29999000052117, '01BS'),
>   (29999000052071, '01C'),
>   (29999000052072, '01C'),
>   (29999000052073, '01C7'),
>   (29999000052116, '01CS'),
>   (29999000052117, '01CS'),
>   (29999000052071, '01D'),
>   (29999000052072, '01D'),
>   (29999000052073, '01D'),
>   (29999000052116, '01DS'),
>   (29999000052117, '01DS');INSERT INTO baz values
>   (29999000052071),
>   (29999000052072),
>   (29999000052073),
>   (29999000052074),
>   (29999000052075),
>   (29999000052076),
>   (29999000052107),
>   (29999000052111),
>   (29999000052112),
>   (29999000052113),
>   (29999000052114),
>   (29999000052116),
>   (29999000052117),
>   (29999000052118),
>   (29999000052119),
>   (29999000052120);{code}
> Normal works(set hive.merge.nway.joins=false):
> {code:java}
> hive> set hive.merge.nway.joins=false;
> hive> SELECT
>   a.id,
>   b.code,
>   c.id
> FROM bar AS a
> INNER JOIN foo AS b
> ON a.id = b.id
>   AND (a.code = '01AS' OR b.code = '01BS')
> LEFT OUTER JOIN baz AS c
> ON a.id = c.id;
> OK
> 29999000052116  01A     29999000052116
> 29999000052117  01A     29999000052117 {code}
> Abnormal works(set hive.merge.nway.joins=true):
> {code:java}
> hive> set hive.merge.nway.joins=true;
> hive> SELECT
>   a.id,
>   b.code,
>   c.id
> FROM bar AS a
> INNER JOIN foo AS b
> ON a.id = b.id
>   AND (a.code = '01AS' OR b.code = '01BS')
> LEFT OUTER JOIN baz AS c
> ON a.id = c.id;
> OK 29999000052071  01A     NULL
> 29999000052072  01A     NULL
> 29999000052073  01      NULL
> 29999000052116  01A     NULL
> 29999000052117  01A     NULL
> 29999000052071  01A     NULL
> 29999000052072  01A     NULL
> 29999000052073  01      NULL
> 29999000052116  01A     29999000052116
> 29999000052117  01A     29999000052117
> 29999000052071  01A     NULL
> 29999000052072  01A     NULL
> 29999000052073  01      NULL
> 29999000052116  01A     NULL
> 29999000052117  01A     NULL
> 29999000052071  01A     NULL
> 29999000052072  01A     NULL
> 29999000052073  01      NULL
> 29999000052116  01A     NULL
> 29999000052117  01A     NULL
> 29999000052071  01A     NULL
> 29999000052072  01A     NULL
> 29999000052073  01      NULL
> 29999000052116  01A     NULL
> 29999000052117  01A     NULL   {code}
>  
> I think this is also related to the next ticket: https://issues.apache.org/jira/browse/HIVE-21322



--
This message was sent by Atlassian Jira
(v8.20.10#820010)