You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/03/05 00:41:03 UTC

svn commit: r1574255 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/stats/fs/ test/queries/clientpositive/

Author: hashutosh
Date: Tue Mar  4 23:41:02 2014
New Revision: 1574255

URL: http://svn.apache.org/r1574255
Log:
HIVE-6539 : Couple of issues in fs based stats collection (Ashutosh Chauhan via Gunther Hagleitner)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsAggregator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java
    hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries.q
    hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q
    hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsAggregator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsAggregator.java?rev=1574255&r1=1574254&r2=1574255&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsAggregator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsAggregator.java Tue Mar  4 23:41:02 2014
@@ -80,9 +80,17 @@ public class FSStatsAggregator implement
   @Override
   public String aggregateStats(String partID, String statType) {
     long counter = 0;
+    LOG.debug("Part ID: " + partID + "\t" + statType);
     for (Map<String,Map<String,String>> statsMap : statsList) {
-      String statVal = statsMap.get(partID).get(statType);
-      counter += Long.valueOf(statVal == null ? "0" : statVal);
+      Map<String,String> partStat = statsMap.get(partID);
+      if (null == partStat) { // not all partitions are scanned in all mappers, so this could be null.
+        continue;
+      }
+      String statVal = partStat.get(statType);
+      if (null == statVal) { // partition was found, but was empty.
+        continue;
+      }
+      counter += Long.valueOf(statVal);
     }
     LOG.info("Read stats for : " + partID + "\t" + statType + "\t" + counter);
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java?rev=1574255&r1=1574254&r2=1574255&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java Tue Mar  4 23:41:02 2014
@@ -69,7 +69,9 @@ public class FSStatsPublisher implements
 
   @Override
   public boolean publishStat(String partKV, Map<String, String> stats) {
-    statsMap.put(partKV, stats);
+    LOG.debug("Putting in map : " + partKV + "\t" + stats);
+    // we need to do new hashmap, since stats object is reused across calls.
+    statsMap.put(partKV, new HashMap<String, String>(stats));
     return true;
   }
 
@@ -81,6 +83,7 @@ public class FSStatsPublisher implements
       LOG.debug("About to create stats file for this task : " + statsFile);
       Output output = new Output(statsFile.getFileSystem(conf).create(statsFile,true));
       LOG.info("Created file : " + statsFile);
+      LOG.info("Writing stats in it : " + statsMap);
       Utilities.runtimeSerializationKryo.get().writeObject(output, statsMap);
       output.close();
       return true;

Modified: hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries.q?rev=1574255&r1=1574254&r2=1574255&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries.q Tue Mar  4 23:41:02 2014
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 set hive.compute.query.using.stats=true;
 set hive.stats.autogather=true;
 create table over10k(
@@ -73,3 +74,4 @@ drop table stats_tbl;
 drop table stats_tbl_part;
 
 set hive.compute.query.using.stats=false;
+set hive.stats.dbclass=jdbc:derby;

Modified: hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q?rev=1574255&r1=1574254&r2=1574255&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q Tue Mar  4 23:41:02 2014
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 set hive.compute.query.using.stats=true;
 create table over10k(
            t tinyint,
@@ -47,3 +48,4 @@ select count(*), count(1), sum(1), sum(2
 
 drop table stats_tbl_part;
 set hive.compute.query.using.stats=false;
+set hive.stats.dbclass=jdbc:derby;

Modified: hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q?rev=1574255&r1=1574254&r2=1574255&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/stats_only_null.q Tue Mar  4 23:41:02 2014
@@ -1,3 +1,4 @@
+set hive.stats.dbclass=fs;
 set hive.compute.query.using.stats=true;
 set hive.stats.autogather=true;
 CREATE TABLE temps_null(a double, b int, c STRING, d smallint) STORED AS TEXTFILE; 
@@ -37,3 +38,4 @@ drop table stats_null;
 drop table stats_null_part;
 drop table temps_null;
 set hive.compute.query.using.stats=false;
+set hive.stats.dbclass=jdbc:derby;