You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2015/08/07 15:19:10 UTC

hive git commit: HIVE-10880: The bucket number is not respected in insert overwrite (Yongzhi via Xuefu)

Repository: hive
Updated Branches:
  refs/heads/master 7e4f3bbff -> 09e330d27


HIVE-10880: The bucket number is not respected in insert overwrite (Yongzhi via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/09e330d2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/09e330d2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/09e330d2

Branch: refs/heads/master
Commit: 09e330d27883ef08e6f66faabd205f527ed6f194
Parents: 7e4f3bb
Author: Xuefu Zhang <xz...@Cloudera.com>
Authored: Fri Aug 7 06:18:53 2015 -0700
Committer: Xuefu Zhang <xz...@Cloudera.com>
Committed: Fri Aug 7 06:18:53 2015 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/exec/Utilities.java   |  26 ++++-
 .../clientpositive/insertoverwrite_bucket.q     |  28 +++++
 .../clientpositive/groupby_sort_1_23.q.out      |  56 +++++-----
 .../clientpositive/groupby_sort_skew_1_23.q.out |  56 +++++-----
 .../clientpositive/insertoverwrite_bucket.q.out | 104 +++++++++++++++++++
 .../spark/groupby_sort_1_23.q.out               |  90 ++++++++--------
 .../spark/groupby_sort_skew_1_23.q.out          |  90 ++++++++--------
 7 files changed, 299 insertions(+), 151 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/09e330d2/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index b505ff8..ca86301 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -1895,7 +1895,7 @@ public final class Utilities {
       if (fs.exists(tmpPath)) {
         // remove any tmp file or double-committed output files
         ArrayList<String> emptyBuckets =
-            Utilities.removeTempOrDuplicateFiles(fs, tmpPath, dpCtx);
+            Utilities.removeTempOrDuplicateFiles(fs, tmpPath, dpCtx, conf, hconf);
         // create empty buckets if necessary
         if (emptyBuckets.size() > 0) {
           createEmptyBuckets(hconf, emptyBuckets, conf, reporter);
@@ -1964,7 +1964,7 @@ public final class Utilities {
    * Remove all temporary files and duplicate (double-committed) files from a given directory.
    */
   public static void removeTempOrDuplicateFiles(FileSystem fs, Path path) throws IOException {
-    removeTempOrDuplicateFiles(fs, path, null);
+    removeTempOrDuplicateFiles(fs, path, null,null,null);
   }
 
   /**
@@ -1973,15 +1973,15 @@ public final class Utilities {
    * @return a list of path names corresponding to should-be-created empty buckets.
    */
   public static ArrayList<String> removeTempOrDuplicateFiles(FileSystem fs, Path path,
-      DynamicPartitionCtx dpCtx) throws IOException {
+      DynamicPartitionCtx dpCtx, FileSinkDesc conf, Configuration hconf) throws IOException {
     if (path == null) {
       return null;
     }
 
     ArrayList<String> result = new ArrayList<String>();
+    HashMap<String, FileStatus> taskIDToFile = null;
     if (dpCtx != null) {
       FileStatus parts[] = HiveStatsUtils.getFileStatusRecurse(path, dpCtx.getNumDPCols(), fs);
-      HashMap<String, FileStatus> taskIDToFile = null;
 
       for (int i = 0; i < parts.length; ++i) {
         assert parts[i].isDir() : "dynamic partition " + parts[i].getPath()
@@ -2017,8 +2017,24 @@ public final class Utilities {
       }
     } else {
       FileStatus[] items = fs.listStatus(path);
-      removeTempOrDuplicateFiles(items, fs);
+      taskIDToFile = removeTempOrDuplicateFiles(items, fs);
+      if(taskIDToFile != null && taskIDToFile.size() > 0 && conf != null && conf.getTable() != null
+          && (conf.getTable().getNumBuckets() > taskIDToFile.size())
+          && (HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEENFORCEBUCKETING))) {
+          // get the missing buckets and generate empty buckets for non-dynamic partition
+        String taskID1 = taskIDToFile.keySet().iterator().next();
+        Path bucketPath = taskIDToFile.values().iterator().next().getPath();
+        for (int j = 0; j < conf.getTable().getNumBuckets(); ++j) {
+          String taskID2 = replaceTaskId(taskID1, j);
+          if (!taskIDToFile.containsKey(taskID2)) {
+            // create empty bucket, file name should be derived from taskID2
+            String path2 = replaceTaskIdFromFilename(bucketPath.toUri().getPath().toString(), j);
+            result.add(path2);
+          }
+        }
+      }
     }
+
     return result;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/09e330d2/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q b/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q
new file mode 100644
index 0000000..d939710
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q
@@ -0,0 +1,28 @@
+CREATE TABLE IF NOT EXISTS bucketinput( 
+data string 
+) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
+CREATE TABLE IF NOT EXISTS bucketoutput1( 
+data string 
+)CLUSTERED BY(data) 
+INTO 2 BUCKETS 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
+CREATE TABLE IF NOT EXISTS bucketoutput2( 
+data string 
+)CLUSTERED BY(data) 
+INTO 2 BUCKETS 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
+insert into table bucketinput values ("firstinsert1");
+insert into table bucketinput values ("firstinsert2");
+insert into table bucketinput values ("firstinsert3");
+set hive.enforce.bucketing = true; 
+set hive.enforce.sorting=true;
+insert overwrite table bucketoutput1 select * from bucketinput where data like 'first%'; 
+set hive.auto.convert.sortmerge.join=true; 
+set hive.optimize.bucketmapjoin = true; 
+set hive.optimize.bucketmapjoin.sortedmerge = true; 
+select * from bucketoutput1 a join bucketoutput2 b on (a.data=b.data);
+drop table buckettestinput;
+drop table buckettestoutput1;
+drop table buckettestoutput2;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/09e330d2/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out
index ffbfcd1..8ba10c5 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out
@@ -155,7 +155,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -177,7 +177,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -492,7 +492,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -514,7 +514,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -750,7 +750,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -772,7 +772,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -1158,7 +1158,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -1180,7 +1180,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -1557,7 +1557,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -1579,7 +1579,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -1898,7 +1898,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -1920,7 +1920,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -2119,7 +2119,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -2141,7 +2141,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -2386,7 +2386,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -2408,7 +2408,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -2734,7 +2734,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -2756,7 +2756,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -3159,7 +3159,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -3181,7 +3181,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -3355,7 +3355,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -3377,7 +3377,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -3816,7 +3816,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -3838,7 +3838,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -4098,7 +4098,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -4120,7 +4120,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -4232,7 +4232,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -4254,7 +4254,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}

http://git-wip-us.apache.org/repos/asf/hive/blob/09e330d2/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out
index 65fcb8e..2a956fc 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out
@@ -155,7 +155,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -177,7 +177,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -493,7 +493,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -515,7 +515,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -815,7 +815,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -837,7 +837,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -1223,7 +1223,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -1245,7 +1245,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -1622,7 +1622,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -1644,7 +1644,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -1964,7 +1964,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -1986,7 +1986,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -2250,7 +2250,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -2272,7 +2272,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -2582,7 +2582,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -2604,7 +2604,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -2994,7 +2994,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -3016,7 +3016,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -3420,7 +3420,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -3442,7 +3442,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -3680,7 +3680,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -3702,7 +3702,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -4141,7 +4141,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -4163,7 +4163,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -4424,7 +4424,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -4446,7 +4446,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}
@@ -4622,7 +4622,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.t1
-              numFiles 1
+              numFiles 2
               numRows 6
               rawDataSize 24
               serialization.ddl struct t1 { string key, string val}
@@ -4644,7 +4644,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.t1
-                numFiles 1
+                numFiles 2
                 numRows 6
                 rawDataSize 24
                 serialization.ddl struct t1 { string key, string val}

http://git-wip-us.apache.org/repos/asf/hive/blob/09e330d2/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out b/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out
new file mode 100644
index 0000000..9b7b85d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out
@@ -0,0 +1,104 @@
+PREHOOK: query: CREATE TABLE IF NOT EXISTS bucketinput( 
+data string 
+) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@bucketinput
+POSTHOOK: query: CREATE TABLE IF NOT EXISTS bucketinput( 
+data string 
+) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@bucketinput
+PREHOOK: query: CREATE TABLE IF NOT EXISTS bucketoutput1( 
+data string 
+)CLUSTERED BY(data) 
+INTO 2 BUCKETS 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@bucketoutput1
+POSTHOOK: query: CREATE TABLE IF NOT EXISTS bucketoutput1( 
+data string 
+)CLUSTERED BY(data) 
+INTO 2 BUCKETS 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@bucketoutput1
+PREHOOK: query: CREATE TABLE IF NOT EXISTS bucketoutput2( 
+data string 
+)CLUSTERED BY(data) 
+INTO 2 BUCKETS 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@bucketoutput2
+POSTHOOK: query: CREATE TABLE IF NOT EXISTS bucketoutput2( 
+data string 
+)CLUSTERED BY(data) 
+INTO 2 BUCKETS 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@bucketoutput2
+PREHOOK: query: insert into table bucketinput values ("firstinsert1")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@bucketinput
+POSTHOOK: query: insert into table bucketinput values ("firstinsert1")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@bucketinput
+POSTHOOK: Lineage: bucketinput.data SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into table bucketinput values ("firstinsert2")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@bucketinput
+POSTHOOK: query: insert into table bucketinput values ("firstinsert2")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@bucketinput
+POSTHOOK: Lineage: bucketinput.data SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert into table bucketinput values ("firstinsert3")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__3
+PREHOOK: Output: default@bucketinput
+POSTHOOK: query: insert into table bucketinput values ("firstinsert3")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__3
+POSTHOOK: Output: default@bucketinput
+POSTHOOK: Lineage: bucketinput.data SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: insert overwrite table bucketoutput1 select * from bucketinput where data like 'first%'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bucketinput
+PREHOOK: Output: default@bucketoutput1
+POSTHOOK: query: insert overwrite table bucketoutput1 select * from bucketinput where data like 'first%'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bucketinput
+POSTHOOK: Output: default@bucketoutput1
+POSTHOOK: Lineage: bucketoutput1.data SIMPLE [(bucketinput)bucketinput.FieldSchema(name:data, type:string, comment:null), ]
+PREHOOK: query: select * from bucketoutput1 a join bucketoutput2 b on (a.data=b.data)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bucketoutput1
+PREHOOK: Input: default@bucketoutput2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from bucketoutput1 a join bucketoutput2 b on (a.data=b.data)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bucketoutput1
+POSTHOOK: Input: default@bucketoutput2
+#### A masked pattern was here ####
+PREHOOK: query: drop table buckettestinput
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table buckettestinput
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table buckettestoutput1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table buckettestoutput1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table buckettestoutput2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table buckettestoutput2
+POSTHOOK: type: DROPTABLE

http://git-wip-us.apache.org/repos/asf/hive/blob/09e330d2/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
index 56aedd6..c67190a 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out
@@ -153,7 +153,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -175,7 +175,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -342,7 +342,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -364,7 +364,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -568,7 +568,7 @@ STAGE PLANS:
                                 columns.types int:int
 #### A masked pattern was here ####
                                 name default.outputtbl1
-                                numFiles 1
+                                numFiles 2
                                 numRows 5
                                 rawDataSize 15
                                 serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -599,7 +599,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -621,7 +621,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -651,7 +651,7 @@ STAGE PLANS:
                 columns.types int:int
 #### A masked pattern was here ####
                 name default.outputtbl1
-                numFiles 1
+                numFiles 2
                 numRows 5
                 rawDataSize 15
                 serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -791,7 +791,7 @@ STAGE PLANS:
                                 columns.types int:int
 #### A masked pattern was here ####
                                 name default.outputtbl1
-                                numFiles 1
+                                numFiles 2
                                 numRows 5
                                 rawDataSize 15
                                 serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -822,7 +822,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -844,7 +844,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -874,7 +874,7 @@ STAGE PLANS:
                 columns.types int:int
 #### A masked pattern was here ####
                 name default.outputtbl1
-                numFiles 1
+                numFiles 2
                 numRows 5
                 rawDataSize 15
                 serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -1036,7 +1036,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -1058,7 +1058,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -1229,7 +1229,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -1251,7 +1251,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -1456,7 +1456,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -1478,7 +1478,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -1522,7 +1522,7 @@ STAGE PLANS:
                           columns.types int:int:int
 #### A masked pattern was here ####
                           name default.outputtbl3
-                          numFiles 1
+                          numFiles 2
                           numRows 5
                           rawDataSize 25
                           serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt}
@@ -1552,7 +1552,7 @@ STAGE PLANS:
                 columns.types int:int:int
 #### A masked pattern was here ####
                 name default.outputtbl3
-                numFiles 1
+                numFiles 2
                 numRows 5
                 rawDataSize 25
                 serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt}
@@ -1729,7 +1729,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -1751,7 +1751,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -1795,7 +1795,7 @@ STAGE PLANS:
                           columns.types int:int
 #### A masked pattern was here ####
                           name default.outputtbl1
-                          numFiles 1
+                          numFiles 2
                           numRows 5
                           rawDataSize 15
                           serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -1825,7 +1825,7 @@ STAGE PLANS:
                 columns.types int:int
 #### A masked pattern was here ####
                 name default.outputtbl1
-                numFiles 1
+                numFiles 2
                 numRows 5
                 rawDataSize 15
                 serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -2023,7 +2023,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -2045,7 +2045,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -2127,7 +2127,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -2149,7 +2149,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -2368,7 +2368,7 @@ STAGE PLANS:
                                   columns.types int:int
 #### A masked pattern was here ####
                                   name default.outputtbl1
-                                  numFiles 2
+                                  numFiles 4
                                   numRows 10
                                   rawDataSize 30
                                   serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -2399,7 +2399,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -2421,7 +2421,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -2476,7 +2476,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -2498,7 +2498,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -2542,7 +2542,7 @@ STAGE PLANS:
                           columns.types int:int
 #### A masked pattern was here ####
                           name default.outputtbl1
-                          numFiles 2
+                          numFiles 4
                           numRows 10
                           rawDataSize 30
                           serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -2572,7 +2572,7 @@ STAGE PLANS:
                 columns.types int:int
 #### A masked pattern was here ####
                 name default.outputtbl1
-                numFiles 2
+                numFiles 4
                 numRows 10
                 rawDataSize 30
                 serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -2780,7 +2780,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -2802,7 +2802,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -2857,7 +2857,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -2879,7 +2879,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -2925,7 +2925,7 @@ STAGE PLANS:
                           columns.types int:int
 #### A masked pattern was here ####
                           name default.outputtbl1
-                          numFiles 3
+                          numFiles 4
                           numRows 10
                           rawDataSize 32
                           serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -2955,7 +2955,7 @@ STAGE PLANS:
                 columns.types int:int
 #### A masked pattern was here ####
                 name default.outputtbl1
-                numFiles 3
+                numFiles 4
                 numRows 10
                 rawDataSize 32
                 serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -3145,7 +3145,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -3167,7 +3167,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -3222,7 +3222,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -3244,7 +3244,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}

http://git-wip-us.apache.org/repos/asf/hive/blob/09e330d2/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
index 048507c..94aaab2 100644
--- a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
+++ b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out
@@ -153,7 +153,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -175,7 +175,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -343,7 +343,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -365,7 +365,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -586,7 +586,7 @@ STAGE PLANS:
                                 columns.types int:int
 #### A masked pattern was here ####
                                 name default.outputtbl1
-                                numFiles 1
+                                numFiles 2
                                 numRows 5
                                 rawDataSize 15
                                 serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -617,7 +617,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -639,7 +639,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -669,7 +669,7 @@ STAGE PLANS:
                 columns.types int:int
 #### A masked pattern was here ####
                 name default.outputtbl1
-                numFiles 1
+                numFiles 2
                 numRows 5
                 rawDataSize 15
                 serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -809,7 +809,7 @@ STAGE PLANS:
                                 columns.types int:int
 #### A masked pattern was here ####
                                 name default.outputtbl1
-                                numFiles 1
+                                numFiles 2
                                 numRows 5
                                 rawDataSize 15
                                 serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -840,7 +840,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -862,7 +862,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -892,7 +892,7 @@ STAGE PLANS:
                 columns.types int:int
 #### A masked pattern was here ####
                 name default.outputtbl1
-                numFiles 1
+                numFiles 2
                 numRows 5
                 rawDataSize 15
                 serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -1054,7 +1054,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -1076,7 +1076,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -1248,7 +1248,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -1270,7 +1270,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -1493,7 +1493,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -1515,7 +1515,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -1576,7 +1576,7 @@ STAGE PLANS:
                           columns.types int:int:int
 #### A masked pattern was here ####
                           name default.outputtbl3
-                          numFiles 1
+                          numFiles 2
                           numRows 5
                           rawDataSize 25
                           serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt}
@@ -1606,7 +1606,7 @@ STAGE PLANS:
                 columns.types int:int:int
 #### A masked pattern was here ####
                 name default.outputtbl3
-                numFiles 1
+                numFiles 2
                 numRows 5
                 rawDataSize 25
                 serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt}
@@ -1784,7 +1784,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -1806,7 +1806,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -1867,7 +1867,7 @@ STAGE PLANS:
                           columns.types int:int
 #### A masked pattern was here ####
                           name default.outputtbl1
-                          numFiles 1
+                          numFiles 2
                           numRows 5
                           rawDataSize 15
                           serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -1897,7 +1897,7 @@ STAGE PLANS:
                 columns.types int:int
 #### A masked pattern was here ####
                 name default.outputtbl1
-                numFiles 1
+                numFiles 2
                 numRows 5
                 rawDataSize 15
                 serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -2095,7 +2095,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -2117,7 +2117,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -2199,7 +2199,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -2221,7 +2221,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -2441,7 +2441,7 @@ STAGE PLANS:
                                   columns.types int:int
 #### A masked pattern was here ####
                                   name default.outputtbl1
-                                  numFiles 2
+                                  numFiles 4
                                   numRows 10
                                   rawDataSize 30
                                   serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -2472,7 +2472,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -2494,7 +2494,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -2549,7 +2549,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -2571,7 +2571,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -2632,7 +2632,7 @@ STAGE PLANS:
                           columns.types int:int
 #### A masked pattern was here ####
                           name default.outputtbl1
-                          numFiles 2
+                          numFiles 4
                           numRows 10
                           rawDataSize 30
                           serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -2662,7 +2662,7 @@ STAGE PLANS:
                 columns.types int:int
 #### A masked pattern was here ####
                 name default.outputtbl1
-                numFiles 2
+                numFiles 4
                 numRows 10
                 rawDataSize 30
                 serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -2870,7 +2870,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -2892,7 +2892,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -2947,7 +2947,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -2969,7 +2969,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -3015,7 +3015,7 @@ STAGE PLANS:
                           columns.types int:int
 #### A masked pattern was here ####
                           name default.outputtbl1
-                          numFiles 3
+                          numFiles 4
                           numRows 10
                           rawDataSize 32
                           serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -3045,7 +3045,7 @@ STAGE PLANS:
                 columns.types int:int
 #### A masked pattern was here ####
                 name default.outputtbl1
-                numFiles 3
+                numFiles 4
                 numRows 10
                 rawDataSize 32
                 serialization.ddl struct outputtbl1 { i32 key, i32 cnt}
@@ -3236,7 +3236,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -3258,7 +3258,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}
@@ -3313,7 +3313,7 @@ STAGE PLANS:
                     columns.types string:string
 #### A masked pattern was here ####
                     name default.t1
-                    numFiles 1
+                    numFiles 2
                     numRows 6
                     rawDataSize 24
                     serialization.ddl struct t1 { string key, string val}
@@ -3335,7 +3335,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.t1
-                      numFiles 1
+                      numFiles 2
                       numRows 6
                       rawDataSize 24
                       serialization.ddl struct t1 { string key, string val}