You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hive.apache.org by "Zoltan Haindrich (Jira)" <ji...@apache.org> on 2021/03/31 12:01:00 UTC
[jira] [Updated] (HIVE-24963) Windowing expression may loose its
input in some cases
[ https://issues.apache.org/jira/browse/HIVE-24963?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Zoltan Haindrich updated HIVE-24963:
------------------------------------
Description:
{code}
drop table if exists sss;
CREATE TABLE `sss`(
`user_id` bigint,
`user_mid` string
)
PARTITIONED BY (
`dt` string)
STORED AS ORC
;
insert into sss partition(dt='part1') VALUES (12345,'user_mid v1'),(12345,'user_mid v1'),(12345,'user_mid v1'),(12345,'user_mid v1'),(12345,'user_mid v1');
set hive.auto.convert.join.noconditionaltask.size=1;
WITH
unioned_user AS (
SELECT
*,
row_number() OVER (PARTITION BY user_mid ORDER BY dt ASC) AS r_asc,
row_number() OVER (PARTITION BY user_mid ORDER BY dt DESC) AS r_desc
FROM (
SELECT DISTINCT
dt,
user_mid
FROM sss
WHERE dt = '20210228'
UNION ALL
SELECT DISTINCT
dt,
user_mid
FROM sss
) AS uni
),
merged_user AS (
SELECT
a.user_mid
FROM (SELECT * FROM unioned_user WHERE r_asc = 1) AS a
INNER JOIN (SELECT * FROM unioned_user WHERE r_desc = 1) AS d
ON a.user_mid = d.user_mid
)
Select count(*) from merged_user;
{code}
was:
{code}
drop table if exists sss;
CREATE TABLE `sss`(
`user_id` bigint,
`user_mid` string
)
PARTITIONED BY (
`dt` string)
STORED AS ORC
;
insert into sss partition(dt='part1') VALUES (12345,'user_mid v1'),(12345,'user_mid v1'),(12345,'user_mid v1'),(12345,'user_mid v1'),(12345,'user_mid v1');
set hive.auto.convert.join.noconditionaltask.size=1;
WITH
unioned_user AS (
SELECT
*,
row_number() OVER (PARTITION BY user_mid ORDER BY dt ASC) AS r_asc,
row_number() OVER (PARTITION BY user_mid ORDER BY dt DESC) AS r_desc
FROM (
SELECT DISTINCT
dt,
user_mid
FROM sss
WHERE dt = '20210228'
UNION ALL
SELECT DISTINCT
dt,
user_mid
FROM sss
) AS uni
),
merged_user AS (
SELECT
a.user_mid
FROM (SELECT * FROM unioned_user WHERE r_asc = 1) AS a
INNER JOIN (SELECT * FROM unioned_user WHERE r_desc = 1) AS d
ON a.user_mid = d.user_mid
)
Select count(*) from merged_user;
{cdode}
> Windowing expression may loose its input in some cases
> ------------------------------------------------------
>
> Key: HIVE-24963
> URL: https://issues.apache.org/jira/browse/HIVE-24963
> Project: Hive
> Issue Type: Bug
> Reporter: Zoltan Haindrich
> Assignee: Zoltan Haindrich
> Priority: Major
>
> {code}
> drop table if exists sss;
> CREATE TABLE `sss`(
> `user_id` bigint,
> `user_mid` string
> )
> PARTITIONED BY (
> `dt` string)
> STORED AS ORC
> ;
> insert into sss partition(dt='part1') VALUES (12345,'user_mid v1'),(12345,'user_mid v1'),(12345,'user_mid v1'),(12345,'user_mid v1'),(12345,'user_mid v1');
> set hive.auto.convert.join.noconditionaltask.size=1;
> WITH
> unioned_user AS (
> SELECT
> *,
> row_number() OVER (PARTITION BY user_mid ORDER BY dt ASC) AS r_asc,
> row_number() OVER (PARTITION BY user_mid ORDER BY dt DESC) AS r_desc
> FROM (
> SELECT DISTINCT
> dt,
> user_mid
> FROM sss
> WHERE dt = '20210228'
> UNION ALL
> SELECT DISTINCT
> dt,
> user_mid
> FROM sss
> ) AS uni
> ),
> merged_user AS (
> SELECT
> a.user_mid
> FROM (SELECT * FROM unioned_user WHERE r_asc = 1) AS a
> INNER JOIN (SELECT * FROM unioned_user WHERE r_desc = 1) AS d
> ON a.user_mid = d.user_mid
> )
> Select count(*) from merged_user;
> {code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)