You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/11/28 14:07:44 UTC

svn commit: r1642289 - in /hive/trunk/ql/src/test: queries/clientpositive/groupby_multi_single_reducer.q queries/clientpositive/smb_mapjoin_3.q results/clientpositive/groupby_multi_single_reducer.q.out results/clientpositive/smb_mapjoin_3.q.out

Author: xuefu
Date: Fri Nov 28 13:07:43 2014
New Revision: 1642289

URL: http://svn.apache.org/r1642289
Log:
HIVE-8989: Make groupby_multi_single_reducer.q and smb_mapjoin_3.q deterministic (Brock via Xuefu, reviewed by Szehon)

Modified:
    hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q
    hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_3.q
    hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out
    hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_3.q.out

Modified: hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q?rev=1642289&r1=1642288&r2=1642289&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q Fri Nov 28 13:07:43 2014
@@ -28,14 +28,14 @@ FROM src
 INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
 INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
 INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
-INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10
+INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2  LIMIT 10
 INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1);
 
 FROM src
 INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
 INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
 INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
-INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10
+INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2  LIMIT 10
 INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1);
 
 SELECT * FROM dest_g2;

Modified: hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_3.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_3.q?rev=1642289&r1=1642288&r2=1642289&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_3.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin_3.q Fri Nov 28 13:07:43 2014
@@ -1,4 +1,4 @@
-
+-- SORT_QUERY_RESULTS
 
 
 

Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out?rev=1642289&r1=1642288&r2=1642289&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out Fri Nov 28 13:07:43 2014
@@ -265,7 +265,7 @@ FROM src
 INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
 INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
 INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
-INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10
+INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2  LIMIT 10
 INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1)
 PREHOOK: type: QUERY
 POSTHOOK: query: EXPLAIN
@@ -273,7 +273,7 @@ FROM src
 INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
 INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
 INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
-INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10
+INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2  LIMIT 10
 INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
@@ -444,15 +444,12 @@ STAGE PLANS:
               expressions: _col0 (type: string), _col2 (type: bigint), concat(_col0, _col3) (type: string), _col3 (type: double), _col4 (type: bigint)
               outputColumnNames: _col0, _col1, _col2, _col3, _col4
               Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-              Limit
-                Number of rows: 10
-                Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  table:
-                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              File Output Operator
+                compressed: false
+                table:
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
           Filter Operator
             predicate: (KEY._col0 >= 5) (type: boolean)
             Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
@@ -480,14 +477,15 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             Reduce Output Operator
-              sort order: 
-              Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: double), _col4 (type: bigint)
+              key expressions: _col0 (type: string), _col1 (type: bigint)
+              sort order: ++
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col2 (type: string), _col3 (type: double), _col4 (type: bigint)
       Reduce Operator Tree:
         Select Operator
-          expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), VALUE._col2 (type: string), VALUE._col3 (type: double), VALUE._col4 (type: bigint)
+          expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: string), VALUE._col1 (type: double), VALUE._col2 (type: bigint)
           outputColumnNames: _col0, _col1, _col2, _col3, _col4
-          Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
           Limit
             Number of rows: 10
             Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
@@ -534,7 +532,7 @@ PREHOOK: query: FROM src
 INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
 INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
 INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
-INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10
+INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2  LIMIT 10
 INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
@@ -547,7 +545,7 @@ POSTHOOK: query: FROM src
 INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
 INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
 INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
-INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10
+INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2  LIMIT 10
 INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src

Modified: hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_3.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_3.q.out?rev=1642289&r1=1642288&r2=1642289&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_3.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_3.q.out Fri Nov 28 13:07:43 2014
@@ -1,8 +1,16 @@
-PREHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+
+
+create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@smb_bucket_1
-POSTHOOK: query: create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+
+
+create table smb_bucket_1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS STORED AS RCFILE
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@smb_bucket_1
@@ -214,12 +222,12 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@smb_bucket_2
 POSTHOOK: Input: default@smb_bucket_3
 #### A masked pattern was here ####
-NULL	NULL	4	val_4
+20	val_20	20	val_20
+23	val_23	23	val_23
 NULL	NULL	10	val_10
 NULL	NULL	17	val_17
 NULL	NULL	19	val_19
-20	val_20	20	val_20
-23	val_23	23	val_23
+NULL	NULL	4	val_4
 PREHOOK: query: explain
 select /*+mapjoin(a)*/ * from smb_bucket_2 a full outer join smb_bucket_3 b on a.key = b.key
 PREHOOK: type: QUERY
@@ -273,14 +281,14 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@smb_bucket_2
 POSTHOOK: Input: default@smb_bucket_3
 #### A masked pattern was here ####
-NULL	NULL	4	val_4
-NULL	NULL	10	val_10
-NULL	NULL	17	val_17
-NULL	NULL	19	val_19
 20	val_20	20	val_20
 23	val_23	23	val_23
 25	val_25	NULL	NULL
 30	val_30	NULL	NULL
+NULL	NULL	10	val_10
+NULL	NULL	17	val_17
+NULL	NULL	19	val_19
+NULL	NULL	4	val_4
 PREHOOK: query: explain
 select /*+mapjoin(b)*/ * from smb_bucket_2 a join smb_bucket_3 b on a.key = b.key
 PREHOOK: type: QUERY
@@ -449,12 +457,12 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@smb_bucket_2
 POSTHOOK: Input: default@smb_bucket_3
 #### A masked pattern was here ####
-NULL	NULL	4	val_4
+20	val_20	20	val_20
+23	val_23	23	val_23
 NULL	NULL	10	val_10
 NULL	NULL	17	val_17
 NULL	NULL	19	val_19
-20	val_20	20	val_20
-23	val_23	23	val_23
+NULL	NULL	4	val_4
 PREHOOK: query: explain
 select /*+mapjoin(b)*/ * from smb_bucket_2 a full outer join smb_bucket_3 b on a.key = b.key
 PREHOOK: type: QUERY
@@ -508,11 +516,11 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@smb_bucket_2
 POSTHOOK: Input: default@smb_bucket_3
 #### A masked pattern was here ####
-NULL	NULL	4	val_4
-NULL	NULL	10	val_10
-NULL	NULL	17	val_17
-NULL	NULL	19	val_19
 20	val_20	20	val_20
 23	val_23	23	val_23
 25	val_25	NULL	NULL
 30	val_30	NULL	NULL
+NULL	NULL	10	val_10
+NULL	NULL	17	val_17
+NULL	NULL	19	val_19
+NULL	NULL	4	val_4