You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/11/28 14:07:44 UTC
svn commit: r1642289 - in /hive/trunk/ql/src/test:
queries/clientpositive/groupby_multi_single_reducer.q
queries/clientpositive/smb_mapjoin_3.q
results/clientpositive/groupby_multi_single_reducer.q.out
results/clientpositive/smb_mapjoin_3.q.out
Author: xuefu
Date: Fri Nov 28 13:07:43 2014
New Revision: 1642289
URL: http://svn.apache.org/r1642289
Log:
HIVE-8989: Make groupby_multi_single_reducer.q and smb_mapjoin_3.q deterministic (Brock via Xuefu, reviewed by Szehon)
Modified:
hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q
hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_3.q
hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out
hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_3.q.out
Modified: hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q?rev=1642289&r1=1642288&r2=1642289&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q Fri Nov 28 13:07:43 2014
@@ -28,14 +28,14 @@ FROM src
INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
-INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10
+INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2 LIMIT 10
INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1);
FROM src
INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
-INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10
+INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2 LIMIT 10
INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1);
SELECT * FROM dest_g2;
Modified: hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_3.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_3.q?rev=1642289&r1=1642288&r2=1642289&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_3.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_3.q Fri Nov 28 13:07:43 2014
@@ -1,4 +1,4 @@
-
+-- SORT_QUERY_RESULTS
Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out?rev=1642289&r1=1642288&r2=1642289&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out Fri Nov 28 13:07:43 2014
@@ -265,7 +265,7 @@ FROM src
INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
-INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10
+INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2 LIMIT 10
INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1)
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN
@@ -273,7 +273,7 @@ FROM src
INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
-INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10
+INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2 LIMIT 10
INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
@@ -444,15 +444,12 @@ STAGE PLANS:
expressions: _col0 (type: string), _col2 (type: bigint), concat(_col0, _col3) (type: string), _col3 (type: double), _col4 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Filter Operator
predicate: (KEY._col0 >= 5) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
@@ -480,14 +477,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
Reduce Output Operator
- sort order:
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: double), _col4 (type: bigint)
+ key expressions: _col0 (type: string), _col1 (type: bigint)
+ sort order: ++
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: string), _col3 (type: double), _col4 (type: bigint)
Reduce Operator Tree:
Select Operator
- expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), VALUE._col2 (type: string), VALUE._col3 (type: double), VALUE._col4 (type: bigint)
+ expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: string), VALUE._col1 (type: double), VALUE._col2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
@@ -534,7 +532,7 @@ PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
-INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10
+INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2 LIMIT 10
INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
@@ -547,7 +545,7 @@ POSTHOOK: query: FROM src
INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
-INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10
+INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2 LIMIT 10
INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
Modified: hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_3.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_3.q.out?rev=1642289&r1=1642288&r2=1642289&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_3.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_3.q.out Fri Nov 28 13:07:43 2014
@@ -1,8 +1,16 @@
-PREHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+
+
+create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@smb_bucket_1
-POSTHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+
+
+create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@smb_bucket_1
@@ -214,12 +222,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@smb_bucket_2
POSTHOOK: Input: default@smb_bucket_3
#### A masked pattern was here ####
-NULL NULL 4 val_4
+20 val_20 20 val_20
+23 val_23 23 val_23
NULL NULL 10 val_10
NULL NULL 17 val_17
NULL NULL 19 val_19
-20 val_20 20 val_20
-23 val_23 23 val_23
+NULL NULL 4 val_4
PREHOOK: query: explain
select /*+mapjoin(a)*/ * from smb_bucket_2 a full outer join smb_bucket_3 b on a.key = b.key
PREHOOK: type: QUERY
@@ -273,14 +281,14 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@smb_bucket_2
POSTHOOK: Input: default@smb_bucket_3
#### A masked pattern was here ####
-NULL NULL 4 val_4
-NULL NULL 10 val_10
-NULL NULL 17 val_17
-NULL NULL 19 val_19
20 val_20 20 val_20
23 val_23 23 val_23
25 val_25 NULL NULL
30 val_30 NULL NULL
+NULL NULL 10 val_10
+NULL NULL 17 val_17
+NULL NULL 19 val_19
+NULL NULL 4 val_4
PREHOOK: query: explain
select /*+mapjoin(b)*/ * from smb_bucket_2 a join smb_bucket_3 b on a.key = b.key
PREHOOK: type: QUERY
@@ -449,12 +457,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@smb_bucket_2
POSTHOOK: Input: default@smb_bucket_3
#### A masked pattern was here ####
-NULL NULL 4 val_4
+20 val_20 20 val_20
+23 val_23 23 val_23
NULL NULL 10 val_10
NULL NULL 17 val_17
NULL NULL 19 val_19
-20 val_20 20 val_20
-23 val_23 23 val_23
+NULL NULL 4 val_4
PREHOOK: query: explain
select /*+mapjoin(b)*/ * from smb_bucket_2 a full outer join smb_bucket_3 b on a.key = b.key
PREHOOK: type: QUERY
@@ -508,11 +516,11 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@smb_bucket_2
POSTHOOK: Input: default@smb_bucket_3
#### A masked pattern was here ####
-NULL NULL 4 val_4
-NULL NULL 10 val_10
-NULL NULL 17 val_17
-NULL NULL 19 val_19
20 val_20 20 val_20
23 val_23 23 val_23
25 val_25 NULL NULL
30 val_30 NULL NULL
+NULL NULL 10 val_10
+NULL NULL 17 val_17
+NULL NULL 19 val_19
+NULL NULL 4 val_4