You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hive.apache.org by "Thejas M Nair (JIRA)" <ji...@apache.org> on 2017/06/22 00:48:00 UTC
[jira] [Updated] (HIVE-16932) incorrect predicate evaluation
[ https://issues.apache.org/jira/browse/HIVE-16932?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Thejas M Nair updated HIVE-16932:
---------------------------------
Description:
hive returns incorrect number of rows when BETWEEN and NOT BETWEEN operators are used in WHERE clause while querying a table that uses ORC as a storage format.
script to replicate the issue on HDP 2.6:
{code}
SET hive.exec.compress.output=false;
SET hive.vectorized.execution.enabled=false;
SET hive.optimize.ppd=true;
SET hive.optimize.ppd.storage=true;
SET N=100000;
SET TTT=default.tmp_tbl_text;
SET TTO=default.tmp_tbl_orc;
DROP TABLE IF EXISTS ${hiveconf:TTT};
DROP TABLE IF EXISTS ${hiveconf:TTO};
create table ${hiveconf:TTT}
stored as textfile
as
select pos as c
from (
select posexplode(split(repeat(',', ${hiveconf:N}), ','))
) as t;
create table ${hiveconf:TTO}
stored as orc
as
select c
from ${hiveconf:TTT};
SELECT count(c) as cnt
FROM ${hiveconf:TTT}
WHERE
c between 0 and ${hiveconf:N}
and c not between ${hiveconf:N} div 4 and ${hiveconf:N} div 2
;
SELECT count(c) as cnt
FROM ${hiveconf:TTO}
WHERE
c between 0 and ${hiveconf:N}
and c not between ${hiveconf:N} div 4 and ${hiveconf:N} div 2
;
DROP TABLE IF EXISTS ${hiveconf:TTT};
DROP TABLE IF EXISTS ${hiveconf:TTO};
{code}
was:
hive returns incorrect number of rows when BETWEEN and NOT BETWEEN operators are used in WHERE clause while querying a table that uses ORC as a storage format.
script to replicate the issue on HDP 2.6:
SET hive.exec.compress.output=false;
SET hive.vectorized.execution.enabled=false;
SET hive.optimize.ppd=true;
SET hive.optimize.ppd.storage=true;
SET N=100000;
SET TTT=default.tmp_tbl_text;
SET TTO=default.tmp_tbl_orc;
DROP TABLE IF EXISTS ${hiveconf:TTT};
DROP TABLE IF EXISTS ${hiveconf:TTO};
create table ${hiveconf:TTT}
stored as textfile
as
select pos as c
from (
select posexplode(split(repeat(',', ${hiveconf:N}), ','))
) as t;
create table ${hiveconf:TTO}
stored as orc
as
select c
from ${hiveconf:TTT};
SELECT count(c) as cnt
FROM ${hiveconf:TTT}
WHERE
c between 0 and ${hiveconf:N}
and c not between ${hiveconf:N} div 4 and ${hiveconf:N} div 2
;
SELECT count(c) as cnt
FROM ${hiveconf:TTO}
WHERE
c between 0 and ${hiveconf:N}
and c not between ${hiveconf:N} div 4 and ${hiveconf:N} div 2
;
DROP TABLE IF EXISTS ${hiveconf:TTT};
DROP TABLE IF EXISTS ${hiveconf:TTO};
> incorrect predicate evaluation
> ------------------------------
>
> Key: HIVE-16932
> URL: https://issues.apache.org/jira/browse/HIVE-16932
> Project: Hive
> Issue Type: Bug
> Components: CLI, Hive, ORC
> Affects Versions: 1.2.1
> Environment: CentOS, HDP 2.6
> Reporter: Jim Hopper
>
> hive returns incorrect number of rows when BETWEEN and NOT BETWEEN operators are used in WHERE clause while querying a table that uses ORC as a storage format.
> script to replicate the issue on HDP 2.6:
> {code}
> SET hive.exec.compress.output=false;
> SET hive.vectorized.execution.enabled=false;
> SET hive.optimize.ppd=true;
> SET hive.optimize.ppd.storage=true;
> SET N=100000;
> SET TTT=default.tmp_tbl_text;
> SET TTO=default.tmp_tbl_orc;
> DROP TABLE IF EXISTS ${hiveconf:TTT};
> DROP TABLE IF EXISTS ${hiveconf:TTO};
> create table ${hiveconf:TTT}
> stored as textfile
> as
> select pos as c
> from (
> select posexplode(split(repeat(',', ${hiveconf:N}), ','))
> ) as t;
> create table ${hiveconf:TTO}
> stored as orc
> as
> select c
> from ${hiveconf:TTT};
> SELECT count(c) as cnt
> FROM ${hiveconf:TTT}
> WHERE
> c between 0 and ${hiveconf:N}
> and c not between ${hiveconf:N} div 4 and ${hiveconf:N} div 2
> ;
> SELECT count(c) as cnt
> FROM ${hiveconf:TTO}
> WHERE
> c between 0 and ${hiveconf:N}
> and c not between ${hiveconf:N} div 4 and ${hiveconf:N} div 2
> ;
> DROP TABLE IF EXISTS ${hiveconf:TTT};
> DROP TABLE IF EXISTS ${hiveconf:TTO};
> {code}
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)