You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hive.apache.org by "Thejas M Nair (JIRA)" <ji...@apache.org> on 2017/06/22 00:48:00 UTC

[jira] [Updated] (HIVE-16932) incorrect predicate evaluation

     [ https://issues.apache.org/jira/browse/HIVE-16932?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Thejas M Nair updated HIVE-16932:
---------------------------------
    Description: 
hive returns incorrect number of rows when BETWEEN and NOT BETWEEN operators are used in WHERE clause while querying a table that uses ORC as a storage format.

script to replicate the issue on HDP 2.6:

{code}

SET hive.exec.compress.output=false;
SET hive.vectorized.execution.enabled=false;
SET hive.optimize.ppd=true;
SET hive.optimize.ppd.storage=true;
SET N=100000;

SET TTT=default.tmp_tbl_text;
SET TTO=default.tmp_tbl_orc;

DROP TABLE IF EXISTS ${hiveconf:TTT};
DROP TABLE IF EXISTS ${hiveconf:TTO};

create table ${hiveconf:TTT}
stored as textfile
as
select pos as c
from (
    select posexplode(split(repeat(',', ${hiveconf:N}), ','))
) as t;


create table ${hiveconf:TTO}
stored as orc
as
select c
from ${hiveconf:TTT};


SELECT count(c) as cnt
FROM ${hiveconf:TTT}
WHERE
    c between 0 and ${hiveconf:N}
    and c not between ${hiveconf:N} div 4 and ${hiveconf:N} div 2
;


SELECT count(c) as cnt
FROM ${hiveconf:TTO}
WHERE
    c between 0 and ${hiveconf:N}
    and c not between ${hiveconf:N} div 4 and ${hiveconf:N} div 2
;

DROP TABLE IF EXISTS ${hiveconf:TTT};
DROP TABLE IF EXISTS ${hiveconf:TTO};

{code}

  was:
hive returns incorrect number of rows when BETWEEN and NOT BETWEEN operators are used in WHERE clause while querying a table that uses ORC as a storage format.

script to replicate the issue on HDP 2.6:



SET hive.exec.compress.output=false;
SET hive.vectorized.execution.enabled=false;
SET hive.optimize.ppd=true;
SET hive.optimize.ppd.storage=true;
SET N=100000;

SET TTT=default.tmp_tbl_text;
SET TTO=default.tmp_tbl_orc;

DROP TABLE IF EXISTS ${hiveconf:TTT};
DROP TABLE IF EXISTS ${hiveconf:TTO};

create table ${hiveconf:TTT}
stored as textfile
as
select pos as c
from (
    select posexplode(split(repeat(',', ${hiveconf:N}), ','))
) as t;


create table ${hiveconf:TTO}
stored as orc
as
select c
from ${hiveconf:TTT};


SELECT count(c) as cnt
FROM ${hiveconf:TTT}
WHERE
    c between 0 and ${hiveconf:N}
    and c not between ${hiveconf:N} div 4 and ${hiveconf:N} div 2
;


SELECT count(c) as cnt
FROM ${hiveconf:TTO}
WHERE
    c between 0 and ${hiveconf:N}
    and c not between ${hiveconf:N} div 4 and ${hiveconf:N} div 2
;

DROP TABLE IF EXISTS ${hiveconf:TTT};
DROP TABLE IF EXISTS ${hiveconf:TTO};



> incorrect predicate evaluation
> ------------------------------
>
>                 Key: HIVE-16932
>                 URL: https://issues.apache.org/jira/browse/HIVE-16932
>             Project: Hive
>          Issue Type: Bug
>          Components: CLI, Hive, ORC
>    Affects Versions: 1.2.1
>         Environment: CentOS, HDP 2.6
>            Reporter: Jim Hopper
>
> hive returns incorrect number of rows when BETWEEN and NOT BETWEEN operators are used in WHERE clause while querying a table that uses ORC as a storage format.
> script to replicate the issue on HDP 2.6:
> {code}
> SET hive.exec.compress.output=false;
> SET hive.vectorized.execution.enabled=false;
> SET hive.optimize.ppd=true;
> SET hive.optimize.ppd.storage=true;
> SET N=100000;
> SET TTT=default.tmp_tbl_text;
> SET TTO=default.tmp_tbl_orc;
> DROP TABLE IF EXISTS ${hiveconf:TTT};
> DROP TABLE IF EXISTS ${hiveconf:TTO};
> create table ${hiveconf:TTT}
> stored as textfile
> as
> select pos as c
> from (
>     select posexplode(split(repeat(',', ${hiveconf:N}), ','))
> ) as t;
> create table ${hiveconf:TTO}
> stored as orc
> as
> select c
> from ${hiveconf:TTT};
> SELECT count(c) as cnt
> FROM ${hiveconf:TTT}
> WHERE
>     c between 0 and ${hiveconf:N}
>     and c not between ${hiveconf:N} div 4 and ${hiveconf:N} div 2
> ;
> SELECT count(c) as cnt
> FROM ${hiveconf:TTO}
> WHERE
>     c between 0 and ${hiveconf:N}
>     and c not between ${hiveconf:N} div 4 and ${hiveconf:N} div 2
> ;
> DROP TABLE IF EXISTS ${hiveconf:TTT};
> DROP TABLE IF EXISTS ${hiveconf:TTO};
> {code}



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)