You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/12/05 15:31:08 UTC
svn commit: r1643294 - in /hive/branches/spark:
itests/src/test/resources/testconfiguration.properties
ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
ql/src/test/results/clientpositive/spark/stats_counter_partitioned.q.out
Author: xuefu
Date: Fri Dec 5 14:31:07 2014
New Revision: 1643294
URL: http://svn.apache.org/r1643294
Log:
HIVE-8783: Create some tests that use Spark counter for stats collection [Spark Branch] (Chengxiang via Xuefu)
Added:
hive/branches/spark/ql/src/test/results/clientpositive/spark/stats_counter_partitioned.q.out
Modified:
hive/branches/spark/itests/src/test/resources/testconfiguration.properties
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
Modified: hive/branches/spark/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/branches/spark/itests/src/test/resources/testconfiguration.properties?rev=1643294&r1=1643293&r2=1643294&view=diff
==============================================================================
--- hive/branches/spark/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/branches/spark/itests/src/test/resources/testconfiguration.properties Fri Dec 5 14:31:07 2014
@@ -828,6 +828,7 @@ spark.query.files=add_part_multiple.q, \
sort_merge_join_desc_8.q, \
spark_test.q, \
stats_counter.q, \
+ stats_counter_partitioned.q, \
stats_noscan_1.q, \
stats_noscan_2.q, \
stats_only_null.q, \
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java?rev=1643294&r1=1643293&r2=1643294&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkTask.java Fri Dec 5 14:31:07 2014
@@ -262,14 +262,14 @@ public class SparkTask extends Task<Spar
List<String> prefixs = new LinkedList<String>();
StatsWork statsWork = statsTask.getWork();
String tablePrefix = getTablePrefix(statsWork);
- List<Partition> partitions = getPartitionsList(statsWork);
+ List<Map<String, String>> partitionSpecs = getPartitionSpecs(statsWork);
int maxPrefixLength = StatsFactory.getMaxPrefixLength(conf);
- if (partitions == null) {
+ if (partitionSpecs == null) {
prefixs.add(Utilities.getHashedStatsPrefix(tablePrefix, maxPrefixLength));
} else {
- for (Partition partition : partitions) {
- String prefixWithPartition = Utilities.join(tablePrefix, Warehouse.makePartPath(partition.getSpec()));
+ for (Map<String, String> partitionSpec : partitionSpecs) {
+ String prefixWithPartition = Utilities.join(tablePrefix, Warehouse.makePartPath(partitionSpec));
prefixs.add(Utilities.getHashedStatsPrefix(prefixWithPartition, maxPrefixLength));
}
}
@@ -319,12 +319,12 @@ public class SparkTask extends Task<Spar
return null;
}
- private List<Partition> getPartitionsList(StatsWork work) throws HiveException {
+ private List<Map<String, String>> getPartitionSpecs(StatsWork work) throws HiveException {
if (work.getLoadFileDesc() != null) {
return null; //we are in CTAS, so we know there are no partitions
}
Table table;
- List<Partition> list = new ArrayList<Partition>();
+ List<Map<String, String>> partitionSpecs = new ArrayList<Map<String, String>>();
if (work.getTableSpecs() != null) {
@@ -337,8 +337,8 @@ public class SparkTask extends Task<Spar
// get all partitions that matches with the partition spec
List<Partition> partitions = tblSpec.partitions;
if (partitions != null) {
- for (Partition partn : partitions) {
- list.add(partn);
+ for (Partition partition : partitions) {
+ partitionSpecs.add(partition.getSpec());
}
}
} else if (work.getLoadTableDesc() != null) {
@@ -353,11 +353,10 @@ public class SparkTask extends Task<Spar
if (dpCtx != null && dpCtx.getNumDPCols() > 0) { // dynamic partitions
// we could not get dynamic partition information before SparkTask execution.
} else { // static partition
- Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false);
- list.add(partn);
+ partitionSpecs.add(tbd.getPartitionSpec());
}
}
- return list;
+ return partitionSpecs;
}
private Map<String, List<String>> getOperatorCounters() {
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/stats_counter_partitioned.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/stats_counter_partitioned.q.out?rev=1643294&view=auto
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/stats_counter_partitioned.q.out (added)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/stats_counter_partitioned.q.out Fri Dec 5 14:31:07 2014
@@ -0,0 +1,481 @@
+PREHOOK: query: -- partitioned table analyze
+
+create table dummy (key string, value string) partitioned by (ds string, hr string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dummy
+POSTHOOK: query: -- partitioned table analyze
+
+create table dummy (key string, value string) partitioned by (ds string, hr string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dummy
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='12')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@dummy
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='12')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@dummy
+POSTHOOK: Output: default@dummy@ds=2008/hr=12
+PREHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='11')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@dummy
+POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' into table dummy partition (ds='2008',hr='11')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@dummy
+POSTHOOK: Output: default@dummy@ds=2008/hr=11
+PREHOOK: query: analyze table dummy partition (ds,hr) compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dummy
+PREHOOK: Input: default@dummy@ds=2008/hr=11
+PREHOOK: Input: default@dummy@ds=2008/hr=12
+PREHOOK: Output: default@dummy
+PREHOOK: Output: default@dummy@ds=2008/hr=11
+PREHOOK: Output: default@dummy@ds=2008/hr=12
+POSTHOOK: query: analyze table dummy partition (ds,hr) compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dummy
+POSTHOOK: Input: default@dummy@ds=2008/hr=11
+POSTHOOK: Input: default@dummy@ds=2008/hr=12
+POSTHOOK: Output: default@dummy
+POSTHOOK: Output: default@dummy@ds=2008/hr=11
+POSTHOOK: Output: default@dummy@ds=2008/hr=12
+PREHOOK: query: describe formatted dummy partition (ds='2008', hr='11')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@dummy
+POSTHOOK: query: describe formatted dummy partition (ds='2008', hr='11')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@dummy
+# col_name data_type comment
+
+key string
+value string
+
+# Partition Information
+# col_name data_type comment
+
+ds string
+hr string
+
+# Detailed Partition Information
+Partition Value: [2008, 11]
+Database: default
+Table: dummy
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: describe formatted dummy partition (ds='2008', hr='12')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@dummy
+POSTHOOK: query: describe formatted dummy partition (ds='2008', hr='12')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@dummy
+# col_name data_type comment
+
+key string
+value string
+
+# Partition Information
+# col_name data_type comment
+
+ds string
+hr string
+
+# Detailed Partition Information
+Partition Value: [2008, 12]
+Database: default
+Table: dummy
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: drop table dummy
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dummy
+PREHOOK: Output: default@dummy
+POSTHOOK: query: drop table dummy
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dummy
+POSTHOOK: Output: default@dummy
+PREHOOK: query: -- static partitioned table on insert
+
+create table dummy (key string, value string) partitioned by (ds string, hr string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dummy
+POSTHOOK: query: -- static partitioned table on insert
+
+create table dummy (key string, value string) partitioned by (ds string, hr string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dummy
+PREHOOK: query: insert overwrite table dummy partition (ds='10',hr='11') select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dummy@ds=10/hr=11
+POSTHOOK: query: insert overwrite table dummy partition (ds='10',hr='11') select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dummy@ds=10/hr=11
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table dummy partition (ds='10',hr='12') select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dummy@ds=10/hr=12
+POSTHOOK: query: insert overwrite table dummy partition (ds='10',hr='12') select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dummy@ds=10/hr=12
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dummy PARTITION(ds=10,hr=12).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: describe formatted dummy partition (ds='10', hr='11')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@dummy
+POSTHOOK: query: describe formatted dummy partition (ds='10', hr='11')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@dummy
+# col_name data_type comment
+
+key string
+value string
+
+# Partition Information
+# col_name data_type comment
+
+ds string
+hr string
+
+# Detailed Partition Information
+Partition Value: [10, 11]
+Database: default
+Table: dummy
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: describe formatted dummy partition (ds='10', hr='12')
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@dummy
+POSTHOOK: query: describe formatted dummy partition (ds='10', hr='12')
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@dummy
+# col_name data_type comment
+
+key string
+value string
+
+# Partition Information
+# col_name data_type comment
+
+ds string
+hr string
+
+# Detailed Partition Information
+Partition Value: [10, 12]
+Database: default
+Table: dummy
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: drop table dummy
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dummy
+PREHOOK: Output: default@dummy
+POSTHOOK: query: drop table dummy
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dummy
+POSTHOOK: Output: default@dummy
+PREHOOK: query: -- dynamic partitioned table on insert
+
+create table dummy (key int) partitioned by (hr int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dummy
+POSTHOOK: query: -- dynamic partitioned table on insert
+
+create table dummy (key int) partitioned by (hr int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dummy
+PREHOOK: query: CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl
+POSTHOOK: query: CREATE TABLE tbl(key int, value int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@tbl
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tbl.txt' OVERWRITE INTO TABLE tbl
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@tbl
+PREHOOK: query: insert overwrite table dummy partition (hr) select * from tbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl
+PREHOOK: Output: default@dummy
+POSTHOOK: query: insert overwrite table dummy partition (hr) select * from tbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl
+POSTHOOK: Output: default@dummy@hr=1994
+POSTHOOK: Output: default@dummy@hr=1996
+POSTHOOK: Output: default@dummy@hr=1997
+POSTHOOK: Output: default@dummy@hr=1998
+POSTHOOK: Lineage: dummy PARTITION(hr=1994).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1996).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1997).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: dummy PARTITION(hr=1998).key SIMPLE [(tbl)tbl.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: describe formatted dummy partition (hr=1997)
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@dummy
+POSTHOOK: query: describe formatted dummy partition (hr=1997)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@dummy
+# col_name data_type comment
+
+key int
+
+# Partition Information
+# col_name data_type comment
+
+hr int
+
+# Detailed Partition Information
+Partition Value: [1997]
+Database: default
+Table: dummy
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 12
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: describe formatted dummy partition (hr=1994)
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@dummy
+POSTHOOK: query: describe formatted dummy partition (hr=1994)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@dummy
+# col_name data_type comment
+
+key int
+
+# Partition Information
+# col_name data_type comment
+
+hr int
+
+# Detailed Partition Information
+Partition Value: [1994]
+Database: default
+Table: dummy
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 2
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: describe formatted dummy partition (hr=1998)
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@dummy
+POSTHOOK: query: describe formatted dummy partition (hr=1998)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@dummy
+# col_name data_type comment
+
+key int
+
+# Partition Information
+# col_name data_type comment
+
+hr int
+
+# Detailed Partition Information
+Partition Value: [1998]
+Database: default
+Table: dummy
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 4
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: describe formatted dummy partition (hr=1996)
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@dummy
+POSTHOOK: query: describe formatted dummy partition (hr=1996)
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@dummy
+# col_name data_type comment
+
+key int
+
+# Partition Information
+# col_name data_type comment
+
+hr int
+
+# Detailed Partition Information
+Partition Value: [1996]
+Database: default
+Table: dummy
+#### A masked pattern was here ####
+Protect Mode: None
+#### A masked pattern was here ####
+Partition Parameters:
+ COLUMN_STATS_ACCURATE true
+ numFiles 1
+ numRows 0
+ rawDataSize 0
+ totalSize 2
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: drop table tbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tbl
+PREHOOK: Output: default@tbl
+POSTHOOK: query: drop table tbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tbl
+POSTHOOK: Output: default@tbl
+PREHOOK: query: drop table dummy
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dummy
+PREHOOK: Output: default@dummy
+POSTHOOK: query: drop table dummy
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dummy
+POSTHOOK: Output: default@dummy