You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/03/13 23:51:10 UTC
svn commit: r1577364 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java
test/queries/clientpositive/lb_fs_stats.q
test/results/clientpositive/lb_fs_stats.q.out
Author: hashutosh
Date: Thu Mar 13 22:51:09 2014
New Revision: 1577364
URL: http://svn.apache.org/r1577364
Log:
HIVE-6630 : FS based stats collection have issues for list bucketing case (Ashutosh Chauhan via Gunther Hagleitner)
Added:
hive/trunk/ql/src/test/queries/clientpositive/lb_fs_stats.q
hive/trunk/ql/src/test/results/clientpositive/lb_fs_stats.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java?rev=1577364&r1=1577363&r2=1577364&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java Thu Mar 13 22:51:09 2014
@@ -914,11 +914,10 @@ public class FileSinkOperator extends Te
String lbSpec = split[1];
String prefix;
- String postfix;
+ String postfix=null;
if (taskIndependent) {
// key = "database.table/SP/DP/"LB/
prefix = conf.getTableInfo().getTableName();
- postfix = Utilities.join(lbSpec);
} else {
// key = "prefix/SP/DP/"LB/taskID/
prefix = conf.getStatsAggPrefix();
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java?rev=1577364&r1=1577363&r2=1577364&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java Thu Mar 13 22:51:09 2014
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.stats.
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
+import java.util.Map.Entry;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -71,7 +72,15 @@ public class FSStatsPublisher implements
public boolean publishStat(String partKV, Map<String, String> stats) {
LOG.debug("Putting in map : " + partKV + "\t" + stats);
// we need to do new hashmap, since stats object is reused across calls.
- statsMap.put(partKV, new HashMap<String, String>(stats));
+ Map<String,String> cpy = new HashMap<String, String>(stats);
+ Map<String,String> statMap = statsMap.get(partKV);
+ if (null != statMap) {
+ // In case of LB, we might get called repeatedly.
+ for (Entry<String, String> e : statMap.entrySet()) {
+ cpy.put(e.getKey(), String.valueOf(Long.valueOf(e.getValue()) + Long.valueOf(cpy.get(e.getKey()))));
+ }
+ }
+ statsMap.put(partKV, cpy);
return true;
}
Added: hive/trunk/ql/src/test/queries/clientpositive/lb_fs_stats.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/lb_fs_stats.q?rev=1577364&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/lb_fs_stats.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/lb_fs_stats.q Thu Mar 13 22:51:09 2014
@@ -0,0 +1,19 @@
+set hive.mapred.supports.subdirectories=true;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set mapred.input.dir.recursive=true;
+set hive.stats.dbclass=fs;
+-- Tests truncating a column from a list bucketing table
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+CREATE TABLE test_tab (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE;
+
+ALTER TABLE test_tab SKEWED BY (key) ON ("484") STORED AS DIRECTORIES;
+
+INSERT OVERWRITE TABLE test_tab PARTITION (part = '1') SELECT * FROM src;
+
+describe formatted test_tab partition (part='1');
+
+set hive.stats.dbclass=jdbc:derby;
Added: hive/trunk/ql/src/test/results/clientpositive/lb_fs_stats.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/lb_fs_stats.q.out?rev=1577364&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/lb_fs_stats.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/lb_fs_stats.q.out Thu Mar 13 22:51:09 2014
@@ -0,0 +1,79 @@
+PREHOOK: query: -- Tests truncating a column from a list bucketing table
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+CREATE TABLE test_tab (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: -- Tests truncating a column from a list bucketing table
+
+-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23)
+
+CREATE TABLE test_tab (key STRING, value STRING) PARTITIONED BY (part STRING) STORED AS RCFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_tab
+PREHOOK: query: ALTER TABLE test_tab SKEWED BY (key) ON ("484") STORED AS DIRECTORIES
+PREHOOK: type: ALTERTABLE_SKEWED
+PREHOOK: Input: default@test_tab
+PREHOOK: Output: default@test_tab
+POSTHOOK: query: ALTER TABLE test_tab SKEWED BY (key) ON ("484") STORED AS DIRECTORIES
+POSTHOOK: type: ALTERTABLE_SKEWED
+POSTHOOK: Input: default@test_tab
+POSTHOOK: Output: default@test_tab
+PREHOOK: query: INSERT OVERWRITE TABLE test_tab PARTITION (part = '1') SELECT * FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_tab@part=1
+POSTHOOK: query: INSERT OVERWRITE TABLE test_tab PARTITION (part = '1') SELECT * FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_tab@part=1
+POSTHOOK: Lineage: test_tab PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_tab PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: describe formatted test_tab partition (part='1')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe formatted test_tab partition (part='1')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: test_tab PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: test_tab PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+# col_name data_type comment
+
+key string None
+value string None
+
+# Partition Information
+# col_name data_type comment
+
+part string None
+
+# Detailed Partition Information
+Partition Value: [1]
+Database: default
+Table: test_tab
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 2
+ numRows 500
+ rawDataSize 4812
+ totalSize 5370
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Stored As SubDirectories: Yes
+Skewed Columns: [key]
+Skewed Values: [[484]]
+#### A masked pattern was here ####
+Skewed Value to Truncated Path: {[484]=/test_tab/part=1/key=484}
+Storage Desc Params:
+ serialization.format 1