You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by "Nemon Lou (JIRA)" <ji...@apache.org> on 2015/04/21 15:44:59 UTC
[jira] [Created] (HIVE-10417) Parallel Order By return wrong
results for partitioned tables
Nemon Lou created HIVE-10417:
--------------------------------
Summary: Parallel Order By return wrong results for partitioned tables
Key: HIVE-10417
URL: https://issues.apache.org/jira/browse/HIVE-10417
Project: Hive
Issue Type: Bug
Affects Versions: 1.0.0, 0.13.1, 0.14.0
Reporter: Nemon Lou
Following is the script that reproduce this bug.
set hive.optimize.sampling.orderby=true;
set mapreduce.job.reduces=10;
select * from src order by key desc limit 10;
+--------------+----------------+
| src.key | src.value |
+--------------+----------------+
| 98 | val_98 |
| 98 | val_98 |
| 97 | val_97 |
| 97 | val_97 |
| 96 | val_96 |
| 95 | val_95 |
| 95 | val_95 |
| 92 | val_92 |
| 90 | val_90 |
| 90 | val_90 |
+--------------+----------------+
10 rows selected (47.916 seconds)
reset;
create table src_orc_p (key string ,value string )
partitioned by (kp string)
stored as orc
tblproperties("orc.compress"="SNAPPY");
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=10000;
set hive.exec.max.dynamic.partitions=10000;
insert into table src_orc_p partition(kp) select *,substring(key,1) from src distribute by substring(key,1);
set mapreduce.job.reduces=10;
set hive.optimize.sampling.orderby=true;
select * from src_orc_p order by key desc limit 10;
+----------------+------------------+-----------------+
| src_orc_p.key | src_orc_p.value | src_orc_p.kend |
+----------------+------------------+-----------------+
| 0 | val_0 | 0 |
| 0 | val_0 | 0 |
| 0 | val_0 | 0 |
+----------------+------------------+-----------------+
3 rows selected (39.861 seconds)
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)