You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2013/08/20 18:45:09 UTC

svn commit: r1515865 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/ test/queries/clientpositive/ test/results/clientpositive/

Author: brock
Date: Tue Aug 20 16:45:08 2013
New Revision: 1515865

URL: http://svn.apache.org/r1515865
Log:
HIVE-4645: Stat information like numFiles and totalSize is not correct when sub-directory is exists (Navis via Brock Noland)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java
    hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_7.q
    hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_8.q
    hive/trunk/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out
    hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out
    hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out
    hive/trunk/ql/src/test/results/clientpositive/stats_noscan_2.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java?rev=1515865&r1=1515864&r2=1515865&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java Tue Aug 20 16:45:08 2013
@@ -19,12 +19,16 @@
 
 package org.apache.hadoop.hive.ql.exec;
 
+import java.io.FileNotFoundException;
+import java.io.IOException;
 import java.io.Serializable;
+import java.net.URI;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Pattern;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -34,12 +38,12 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.Warehouse;
-import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.DriverContext;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils;
 import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec;
 import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
 import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
@@ -271,8 +275,6 @@ public class StatsTask extends Task<Stat
     try {
       // Stats setup:
       Warehouse wh = new Warehouse(conf);
-      FileSystem fileSys;
-      FileStatus[] fileStatus;
 
       if (!this.getWork().getNoStatsAggregator()) {
         String statsImplementationClass = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS);
@@ -322,16 +324,9 @@ public class StatsTask extends Task<Stat
         if (!tableStatsExist && atomic) {
           return 0;
         }
-        Path tablePath = wh.getTablePath(db.getDatabase(table.getDbName()), table.getTableName());
-        fileSys = tablePath.getFileSystem(conf);
-        fileStatus = Utilities.getFileStatusRecurse(tablePath, 1, fileSys);
-
-        tblStats.setStat(StatsSetupConst.NUM_FILES, fileStatus.length);
-        long tableSize = 0L;
-        for (int i = 0; i < fileStatus.length; i++) {
-          tableSize += fileStatus[i].getLen();
-        }
-        tblStats.setStat(StatsSetupConst.TOTAL_SIZE, tableSize);
+        long[] summary = summary(conf, table);
+        tblStats.setStat(StatsSetupConst.NUM_FILES, summary[0]);
+        tblStats.setStat(StatsSetupConst.TOTAL_SIZE, summary[1]);
 
         // In case of a non-partitioned table, the key for stats temporary store is "rootDir"
         if (statsAggregator != null) {
@@ -403,18 +398,9 @@ public class StatsTask extends Task<Stat
             }
           }
 
-          fileSys = partn.getPartitionPath().getFileSystem(conf);
-          /* consider sub-directory created from list bucketing. */
-          int listBucketingDepth = calculateListBucketingDMLDepth(partn);
-          fileStatus = Utilities.getFileStatusRecurse(partn.getPartitionPath(),
-              (1 + listBucketingDepth), fileSys);
-          newPartStats.setStat(StatsSetupConst.NUM_FILES, fileStatus.length);
-
-          long partitionSize = 0L;
-          for (int i = 0; i < fileStatus.length; i++) {
-            partitionSize += fileStatus[i].getLen();
-          }
-          newPartStats.setStat(StatsSetupConst.TOTAL_SIZE, partitionSize);
+          long[] summary = summary(conf, partn);
+          newPartStats.setStat(StatsSetupConst.NUM_FILES, summary[0]);
+          newPartStats.setStat(StatsSetupConst.TOTAL_SIZE, summary[1]);
 
           if (hasStats) {
             PartitionStatistics oldPartStats = new PartitionStatistics(currentValues);
@@ -478,26 +464,103 @@ public class StatsTask extends Task<Stat
     return ret;
   }
 
-  /**
-   * List bucketing will introduce sub-directories.
-   *
-   * calculate it here in order to go to the leaf directory
-   *
-   * so that we can count right number of files.
-   *
-   * @param partn
-   * @return
-   */
-  private int calculateListBucketingDMLDepth(Partition partn) {
-    // list bucketing will introduce more files
-    int listBucketingDepth = 0;
-    if ((partn.getSkewedColNames() != null) && (partn.getSkewedColNames().size() > 0)
-        && (partn.getSkewedColValues() != null) && (partn.getSkewedColValues().size() > 0)
-        && (partn.getSkewedColValueLocationMaps() != null)
-        && (partn.getSkewedColValueLocationMaps().size() > 0)) {
-      listBucketingDepth = partn.getSkewedColNames().size();
+  private long[] summary(HiveConf conf, Partition partn) throws IOException {
+    Path path = partn.getPartitionPath();
+    FileSystem fs = path.getFileSystem(conf);
+    List<String> skewedColNames = partn.getSkewedColNames();
+    if (skewedColNames == null || skewedColNames.isEmpty()) {
+      return summary(fs, path);
+    }
+    List<List<String>> skewColValues = table.getSkewedColValues();
+    if (skewColValues == null || skewColValues.isEmpty()) {
+      return summary(fs, toDefaultLBPath(path));
+    }
+    return summary(fs, path, skewedColNames);
+  }
+
+  private long[] summary(HiveConf conf, Table table) throws IOException {
+    Path path = table.getPath();
+    FileSystem fs = path.getFileSystem(conf);
+    List<String> skewedColNames = table.getSkewedColNames();
+    if (skewedColNames == null || skewedColNames.isEmpty()) {
+      return summary(fs, path);
+    }
+    List<List<String>> skewColValues = table.getSkewedColValues();
+    if (skewColValues == null || skewColValues.isEmpty()) {
+      return summary(fs, toDefaultLBPath(path));
+    }
+    return summary(fs, path, table.getSkewedColNames());
+  }
+
+  private Path toDefaultLBPath(Path path) {
+    return new Path(path, ListBucketingPrunerUtils.HIVE_LIST_BUCKETING_DEFAULT_DIR_NAME);
+  }
+
+  private long[] summary(FileSystem fs, Path path) throws IOException {
+    try {
+      FileStatus status = fs.getFileStatus(path);
+      if (!status.isDir()) {
+        return new long[] {1, status.getLen()};
+      }
+    } catch (FileNotFoundException e) {
+      return new long[] {0, 0};
+    }
+    FileStatus[] children = fs.listStatus(path);  // can be null
+    if (children == null) {
+      return new long[] {0, 0};
+    }
+    long numFiles = 0L;
+    long tableSize = 0L;
+    for (FileStatus child : children) {
+      if (!child.isDir()) {
+        tableSize += child.getLen();
+        numFiles++;
+      }
+    }
+    return new long[] {numFiles, tableSize};
+  }
+
+  private Pattern toPattern(List<String> skewCols) {
+    StringBuilder builder = new StringBuilder();
+    for (String skewCol : skewCols) {
+      if (builder.length() > 0) {
+        builder.append(Path.SEPARATOR_CHAR);
+      }
+      builder.append(skewCol).append('=');
+      builder.append("[^").append(Path.SEPARATOR_CHAR).append("]*");
+    }
+    builder.append(Path.SEPARATOR_CHAR);
+    builder.append("[^").append(Path.SEPARATOR_CHAR).append("]*$");
+    return Pattern.compile(builder.toString());
+  }
+
+  private long[] summary(FileSystem fs, Path path, List<String> skewCols) throws IOException {
+    long numFiles = 0L;
+    long tableSize = 0L;
+    Pattern pattern = toPattern(skewCols);
+    for (FileStatus status : Utilities.getFileStatusRecurse(path, skewCols.size() + 1, fs)) {
+      if (status.isDir()) {
+        continue;
+      }
+      String relative = toRelativePath(path, status.getPath());
+      if (relative == null) {
+        continue;
+      }
+      if (relative.startsWith(ListBucketingPrunerUtils.HIVE_LIST_BUCKETING_DEFAULT_DIR_NAME) ||
+        pattern.matcher(relative).matches()) {
+        tableSize += status.getLen();
+        numFiles++;
+      }
+    }
+    return new long[] {numFiles, tableSize};
+  }
+
+  private String toRelativePath(Path path1, Path path2) {
+    URI relative = path1.toUri().relativize(path2.toUri());
+    if (relative == path2.toUri()) {
+      return null;
     }
-    return listBucketingDepth;
+    return relative.getPath();
   }
 
   private boolean existStats(Map<String, String> parameters) {

Modified: hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_7.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_7.q?rev=1515865&r1=1515864&r2=1515865&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_7.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_7.q Tue Aug 20 16:45:08 2013
@@ -64,7 +64,7 @@ set hive.input.format=org.apache.hadoop.
 explain extended
 select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484';
 select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484';
-select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484';
+select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' order by hr;
 
 -- clean up
 drop table list_bucketing_dynamic_part;

Modified: hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_8.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_8.q?rev=1515865&r1=1515864&r2=1515865&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_8.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/list_bucket_dml_8.q Tue Aug 20 16:45:08 2013
@@ -81,7 +81,7 @@ select count(*) from list_bucketing_dyna
 explain extended
 select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484';
 select * from list_bucketing_dynamic_part where key = '484' and value = 'val_484';
-select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484';
+select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' order by hr;
 
 -- clean up
 drop table list_bucketing_dynamic_part;

Modified: hive/trunk/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out?rev=1515865&r1=1515864&r2=1515865&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out Tue Aug 20 16:45:08 2013
@@ -140,7 +140,7 @@ Partition Parameters:	 	 
 	numFiles            	1                   
 	numRows             	309                 
 	rawDataSize         	1482                
-	totalSize           	136                 
+	totalSize           	1791                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 

Modified: hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out?rev=1515865&r1=1515864&r2=1515865&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out Tue Aug 20 16:45:08 2013
@@ -290,10 +290,10 @@ Table:              	list_bucketing_dyna
 Protect Mode:       	None                	 
 #### A masked pattern was here ####
 Partition Parameters:	 	 
-	numFiles            	1                   
+	numFiles            	2                   
 	numRows             	16                  
 	rawDataSize         	136                 
-	totalSize           	4096                
+	totalSize           	310                 
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -417,7 +417,7 @@ STAGE PLANS:
                       columns.types string:string
 #### A masked pattern was here ####
                       name default.list_bucketing_dynamic_part
-                      numFiles 5
+                      numFiles 6
                       numPartitions 2
                       numRows 1000
                       partition_columns hr
@@ -425,7 +425,7 @@ STAGE PLANS:
                       serialization.ddl struct list_bucketing_dynamic_part { string key, string value}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-                      totalSize 14672
+                      totalSize 10886
 #### A masked pattern was here ####
                     serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
                     name: default.list_bucketing_dynamic_part
@@ -557,7 +557,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.list_bucketing_dynamic_part
-                numFiles 5
+                numFiles 6
                 numPartitions 2
                 numRows 1000
                 partition_columns hr
@@ -565,7 +565,7 @@ STAGE PLANS:
                 serialization.ddl struct list_bucketing_dynamic_part { string key, string value}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-                totalSize 14672
+                totalSize 10886
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
               name: default.list_bucketing_dynamic_part
@@ -590,7 +590,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.list_bucketing_dynamic_part
-              numFiles 5
+              numFiles 6
               numPartitions 2
               numRows 1000
               partition_columns hr
@@ -598,7 +598,7 @@ STAGE PLANS:
               serialization.ddl struct list_bucketing_dynamic_part { string key, string value}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-              totalSize 14672
+              totalSize 10886
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
           
@@ -610,7 +610,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.list_bucketing_dynamic_part
-                numFiles 5
+                numFiles 6
                 numPartitions 2
                 numRows 1000
                 partition_columns hr
@@ -618,7 +618,7 @@ STAGE PLANS:
                 serialization.ddl struct list_bucketing_dynamic_part { string key, string value}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-                totalSize 14672
+                totalSize 10886
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
               name: default.list_bucketing_dynamic_part
@@ -641,7 +641,7 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.list_bucketing_dynamic_part
-              numFiles 5
+              numFiles 6
               numPartitions 2
               numRows 1000
               partition_columns hr
@@ -649,7 +649,7 @@ STAGE PLANS:
               serialization.ddl struct list_bucketing_dynamic_part { string key, string value}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-              totalSize 14672
+              totalSize 10886
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
           
@@ -661,7 +661,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.list_bucketing_dynamic_part
-                numFiles 5
+                numFiles 6
                 numPartitions 2
                 numRows 1000
                 partition_columns hr
@@ -669,7 +669,7 @@ STAGE PLANS:
                 serialization.ddl struct list_bucketing_dynamic_part { string key, string value}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-                totalSize 14672
+                totalSize 10886
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
               name: default.list_bucketing_dynamic_part
@@ -757,7 +757,7 @@ Partition Parameters:	 	 
 	numFiles            	1                   
 	numRows             	16                  
 	rawDataSize         	136                 
-	totalSize           	4096                
+	totalSize           	254                 
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -954,7 +954,7 @@ STAGE PLANS:
               serialization.ddl struct list_bucketing_dynamic_part { string key, string value}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-              totalSize 4096
+              totalSize 254
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
           
@@ -974,7 +974,7 @@ STAGE PLANS:
                 serialization.ddl struct list_bucketing_dynamic_part { string key, string value}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-                totalSize 14616
+                totalSize 10774
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
               name: default.list_bucketing_dynamic_part
@@ -1020,7 +1020,7 @@ STAGE PLANS:
                 serialization.ddl struct list_bucketing_dynamic_part { string key, string value}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-                totalSize 14616
+                totalSize 10774
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
               name: default.list_bucketing_dynamic_part
@@ -1056,13 +1056,13 @@ POSTHOOK: Lineage: list_bucketing_dynami
 POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
 484	val_484	2008-04-08	b1
 484	val_484	2008-04-08	b1
-PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484'
+PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' order by hr
 PREHOOK: type: QUERY
 PREHOOK: Input: default@srcpart
 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
 #### A masked pattern was here ####
-POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484'
+POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' order by hr
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcpart
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -1076,8 +1076,8 @@ POSTHOOK: Lineage: list_bucketing_dynami
 POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
 POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
-484	val_484	2008-04-08	12
 484	val_484	2008-04-08	11
+484	val_484	2008-04-08	12
 PREHOOK: query: -- clean up
 drop table list_bucketing_dynamic_part
 PREHOOK: type: DROPTABLE

Modified: hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out?rev=1515865&r1=1515864&r2=1515865&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out Tue Aug 20 16:45:08 2013
@@ -346,10 +346,10 @@ Table:              	list_bucketing_dyna
 Protect Mode:       	None                	 
 #### A masked pattern was here ####
 Partition Parameters:	 	 
-	numFiles            	1                   
+	numFiles            	2                   
 	numRows             	16                  
 	rawDataSize         	136                 
-	totalSize           	4096                
+	totalSize           	310                 
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -585,14 +585,14 @@ STAGE PLANS:
               columns.types string:string
 #### A masked pattern was here ####
               name default.list_bucketing_dynamic_part
-              numFiles 1
+              numFiles 2
               numRows 16
               partition_columns ds/hr
               rawDataSize 136
               serialization.ddl struct list_bucketing_dynamic_part { string key, string value}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-              totalSize 4096
+              totalSize 310
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
           
@@ -604,7 +604,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.list_bucketing_dynamic_part
-                numFiles 4
+                numFiles 5
                 numPartitions 2
                 numRows 1000
                 partition_columns ds/hr
@@ -612,7 +612,7 @@ STAGE PLANS:
                 serialization.ddl struct list_bucketing_dynamic_part { string key, string value}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-                totalSize 14682
+                totalSize 10896
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
               name: default.list_bucketing_dynamic_part
@@ -650,7 +650,7 @@ STAGE PLANS:
                 columns.types string:string
 #### A masked pattern was here ####
                 name default.list_bucketing_dynamic_part
-                numFiles 4
+                numFiles 5
                 numPartitions 2
                 numRows 1000
                 partition_columns ds/hr
@@ -658,7 +658,7 @@ STAGE PLANS:
                 serialization.ddl struct list_bucketing_dynamic_part { string key, string value}
                 serialization.format 1
                 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
-                totalSize 14682
+                totalSize 10896
 #### A masked pattern was here ####
               serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
               name: default.list_bucketing_dynamic_part
@@ -690,13 +690,13 @@ POSTHOOK: Lineage: list_bucketing_dynami
 POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
 484	val_484	2008-04-08	b1
 484	val_484	2008-04-08	b1
-PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484'
+PREHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' order by hr
 PREHOOK: type: QUERY
 PREHOOK: Input: default@srcpart
 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
 #### A masked pattern was here ####
-POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484'
+POSTHOOK: query: select * from srcpart where ds = '2008-04-08' and key = '484' and value = 'val_484' order by hr
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@srcpart
 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
@@ -706,8 +706,8 @@ POSTHOOK: Lineage: list_bucketing_dynami
 POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=a1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
 POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: list_bucketing_dynamic_part PARTITION(ds=2008-04-08,hr=b1).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
-484	val_484	2008-04-08	12
 484	val_484	2008-04-08	11
+484	val_484	2008-04-08	12
 PREHOOK: query: -- clean up
 drop table list_bucketing_dynamic_part
 PREHOOK: type: DROPTABLE

Modified: hive/trunk/ql/src/test/results/clientpositive/stats_noscan_2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/stats_noscan_2.q.out?rev=1515865&r1=1515864&r2=1515865&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/stats_noscan_2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/stats_noscan_2.q.out Tue Aug 20 16:45:08 2013
@@ -46,11 +46,11 @@ Retention:          	0                  
 Table Type:         	EXTERNAL_TABLE      	 
 Table Parameters:	 	 
 	EXTERNAL            	TRUE                
-	numFiles            	0                   
+	numFiles            	1                   
 	numPartitions       	0                   
 	numRows             	6                   
 	rawDataSize         	6                   
-	totalSize           	0                   
+	totalSize           	11                  
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -88,11 +88,11 @@ Retention:          	0                  
 Table Type:         	EXTERNAL_TABLE      	 
 Table Parameters:	 	 
 	EXTERNAL            	TRUE                
-	numFiles            	0                   
+	numFiles            	1                   
 	numPartitions       	0                   
 	numRows             	0                   
 	rawDataSize         	0                   
-	totalSize           	0                   
+	totalSize           	11                  
 #### A masked pattern was here ####
 	 	 
 # Storage Information