You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2016/05/12 17:20:58 UTC
hive git commit: HIVE-13621: compute stats in certain cases fails
with NPE (Vikram Dixit K, Pengcheng Xiong, reviewed by Gunther Hagleitner)
Repository: hive
Updated Branches:
refs/heads/master 64c96e1e9 -> 4156c5da5
HIVE-13621: compute stats in certain cases fails with NPE (Vikram Dixit K, Pengcheng Xiong, reviewed by Gunther Hagleitner)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4156c5da
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4156c5da
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4156c5da
Branch: refs/heads/master
Commit: 4156c5da5099e3fa9b220229fe99ef0d609cd7ac
Parents: 64c96e1
Author: Pengcheng Xiong <px...@apache.org>
Authored: Thu May 12 10:17:21 2016 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Thu May 12 10:17:21 2016 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../hadoop/hive/metastore/hbase/HBaseUtils.java | 27 +--
.../apache/hadoop/hive/ql/exec/Operator.java | 2 +-
.../stats/annotation/StatsRulesProcFactory.java | 1 +
.../test/queries/clientpositive/deleteAnalyze.q | 31 ++++
.../results/clientpositive/deleteAnalyze.q.out | 173 +++++++++++++++++++
.../clientpositive/tez/deleteAnalyze.q.out | 140 +++++++++++++++
7 files changed, 363 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/4156c5da/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 5aedd10..c891d40 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -70,6 +70,7 @@ disabled.query.files=ql_rewrite_gbtoidx.q,\
smb_mapjoin_8.q
minitez.query.files.shared=acid_globallimit.q,\
+ deleteAnalyze.q,\
empty_join.q,\
alter_merge_2_orc.q,\
alter_merge_orc.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/4156c5da/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java
index e0b449b..d1cff06 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseUtils.java
@@ -1205,17 +1205,22 @@ public class HBaseUtils {
if (decimalData.isSetBitVectors()) {
builder.setBitVectors(decimalData.getBitVectors());
}
- builder.setDecimalStats(
- HbaseMetastoreProto.ColumnStats.DecimalStats
- .newBuilder()
- .setLowValue(
- HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder()
- .setUnscaled(ByteString.copyFrom(decimalData.getLowValue().getUnscaled()))
- .setScale(decimalData.getLowValue().getScale()).build())
- .setHighValue(
- HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder()
- .setUnscaled(ByteString.copyFrom(decimalData.getHighValue().getUnscaled()))
- .setScale(decimalData.getHighValue().getScale()).build())).build();
+ if (decimalData.getLowValue() != null && decimalData.getHighValue() != null) {
+ builder.setDecimalStats(
+ HbaseMetastoreProto.ColumnStats.DecimalStats
+ .newBuilder()
+ .setLowValue(
+ HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder()
+ .setUnscaled(ByteString.copyFrom(decimalData.getLowValue().getUnscaled()))
+ .setScale(decimalData.getLowValue().getScale()).build())
+ .setHighValue(
+ HbaseMetastoreProto.ColumnStats.DecimalStats.Decimal.newBuilder()
+ .setUnscaled(ByteString.copyFrom(decimalData.getHighValue().getUnscaled()))
+ .setScale(decimalData.getHighValue().getScale()).build())).build();
+ } else {
+ builder.setDecimalStats(HbaseMetastoreProto.ColumnStats.DecimalStats.newBuilder().clear()
+ .build());
+ }
break;
default:
http://git-wip-us.apache.org/repos/asf/hive/blob/4156c5da/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
index f330564..636f079 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
@@ -412,7 +412,7 @@ public abstract class Operator<T extends OperatorDesc> implements Serializable,C
}
/**
- * This metod can be used to retrieve the results from async operations
+ * This method can be used to retrieve the results from async operations
* started at init time - before the operator pipeline is started.
*
* @param os
http://git-wip-us.apache.org/repos/asf/hive/blob/4156c5da/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 320dc10..3944e10 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -1792,6 +1792,7 @@ public class StatsRulesProcFactory {
}
}
+ denom = denom == 0 ? 1 : denom;
factor = (double) max / (double) denom;
for (int i = 0; i < rowCountParents.size(); i++) {
http://git-wip-us.apache.org/repos/asf/hive/blob/4156c5da/ql/src/test/queries/clientpositive/deleteAnalyze.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/deleteAnalyze.q b/ql/src/test/queries/clientpositive/deleteAnalyze.q
new file mode 100644
index 0000000..7e5371c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/deleteAnalyze.q
@@ -0,0 +1,31 @@
+set hive.stats.autogather=true;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/testdeci2;
+
+create table testdeci2(
+id int,
+amount decimal(10,3),
+sales_tax decimal(10,3),
+item string)
+stored as orc location '${system:test.tmp.dir}/testdeci2';
+
+insert into table testdeci2 values(1,12.123,12345.123,'desk1'),(2,123.123,1234.123,'desk2');
+
+describe formatted testdeci2;
+
+dfs -rmr ${system:test.tmp.dir}/testdeci2/000000_0;
+
+describe formatted testdeci2 amount;
+
+analyze table testdeci2 compute statistics for columns;
+
+set hive.stats.fetch.column.stats=true;
+
+analyze table testdeci2 compute statistics for columns;
+
+explain
+select s.id,
+coalesce(d.amount,0) as sales,
+coalesce(d.sales_tax,0) as tax
+from testdeci2 s join testdeci2 d
+on s.item=d.item and d.id=2;
http://git-wip-us.apache.org/repos/asf/hive/blob/4156c5da/ql/src/test/results/clientpositive/deleteAnalyze.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/deleteAnalyze.q.out
new file mode 100644
index 0000000..7b9391b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/deleteAnalyze.q.out
@@ -0,0 +1,173 @@
+PREHOOK: query: create table testdeci2(
+id int,
+amount decimal(10,3),
+sales_tax decimal(10,3),
+item string)
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@testdeci2
+POSTHOOK: query: create table testdeci2(
+id int,
+amount decimal(10,3),
+sales_tax decimal(10,3),
+item string)
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@testdeci2
+PREHOOK: query: insert into table testdeci2 values(1,12.123,12345.123,'desk1'),(2,123.123,1234.123,'desk2')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@testdeci2
+POSTHOOK: query: insert into table testdeci2 values(1,12.123,12345.123,'desk1'),(2,123.123,1234.123,'desk2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@testdeci2
+POSTHOOK: Lineage: testdeci2.amount EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: testdeci2.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: testdeci2.item SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+POSTHOOK: Lineage: testdeci2.sales_tax EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+PREHOOK: query: describe formatted testdeci2
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@testdeci2
+POSTHOOK: query: describe formatted testdeci2
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@testdeci2
+# col_name data_type comment
+
+id int
+amount decimal(10,3)
+sales_tax decimal(10,3)
+item string
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ numFiles 1
+ totalSize 578
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+#### A masked pattern was here ####
+PREHOOK: query: describe formatted testdeci2 amount
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@testdeci2
+POSTHOOK: query: describe formatted testdeci2 amount
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@testdeci2
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+amount decimal(10,3) from deserializer
+PREHOOK: query: analyze table testdeci2 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testdeci2
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table testdeci2 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testdeci2
+#### A masked pattern was here ####
+PREHOOK: query: analyze table testdeci2 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testdeci2
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table testdeci2 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testdeci2
+#### A masked pattern was here ####
+PREHOOK: query: explain
+select s.id,
+coalesce(d.amount,0) as sales,
+coalesce(d.sales_tax,0) as tax
+from testdeci2 s join testdeci2 d
+on s.item=d.item and d.id=2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select s.id,
+coalesce(d.amount,0) as sales,
+coalesce(d.sales_tax,0) as tax
+from testdeci2 s join testdeci2 d
+on s.item=d.item and d.id=2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: s
+ Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: item is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: id (type: int), item (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 5 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
+ TableScan
+ alias: s
+ Statistics: Num rows: 1 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((id = 2) and item is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: amount (type: decimal(10,3)), sales_tax (type: decimal(10,3)), item (type: string)
+ outputColumnNames: _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col3 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: string)
+ Statistics: Num rows: 1 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: decimal(10,3)), _col2 (type: decimal(10,3))
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col3 (type: string)
+ outputColumnNames: _col0, _col3, _col4
+ Statistics: Num rows: 5 Data size: 1140 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), COALESCE(_col3,0) (type: decimal(13,3)), COALESCE(_col4,0) (type: decimal(13,3))
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 5 Data size: 1140 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 1140 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/4156c5da/ql/src/test/results/clientpositive/tez/deleteAnalyze.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/tez/deleteAnalyze.q.out
new file mode 100644
index 0000000..47f2a20
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/deleteAnalyze.q.out
@@ -0,0 +1,140 @@
+PREHOOK: query: create table testdeci2(
+id int,
+amount decimal(10,3),
+sales_tax decimal(10,3),
+item string)
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@testdeci2
+POSTHOOK: query: create table testdeci2(
+id int,
+amount decimal(10,3),
+sales_tax decimal(10,3),
+item string)
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@testdeci2
+PREHOOK: query: insert into table testdeci2 values(1,12.123,12345.123,'desk1'),(2,123.123,1234.123,'desk2')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@testdeci2
+POSTHOOK: query: insert into table testdeci2 values(1,12.123,12345.123,'desk1'),(2,123.123,1234.123,'desk2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@testdeci2
+POSTHOOK: Lineage: testdeci2.amount EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: testdeci2.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: testdeci2.item SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ]
+POSTHOOK: Lineage: testdeci2.sales_tax EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ]
+PREHOOK: query: describe formatted testdeci2
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@testdeci2
+POSTHOOK: query: describe formatted testdeci2
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@testdeci2
+# col_name data_type comment
+
+id int
+amount decimal(10,3)
+sales_tax decimal(10,3)
+item string
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ numFiles 1
+ totalSize 578
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+#### A masked pattern was here ####
+PREHOOK: query: describe formatted testdeci2 amount
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@testdeci2
+POSTHOOK: query: describe formatted testdeci2 amount
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@testdeci2
+# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment
+
+amount decimal(10,3) from deserializer
+PREHOOK: query: analyze table testdeci2 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testdeci2
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table testdeci2 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testdeci2
+#### A masked pattern was here ####
+PREHOOK: query: analyze table testdeci2 compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testdeci2
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table testdeci2 compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testdeci2
+#### A masked pattern was here ####
+PREHOOK: query: explain
+select s.id,
+coalesce(d.amount,0) as sales,
+coalesce(d.sales_tax,0) as tax
+from testdeci2 s join testdeci2 d
+on s.item=d.item and d.id=2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select s.id,
+coalesce(d.amount,0) as sales,
+coalesce(d.sales_tax,0) as tax
+from testdeci2 s join testdeci2 d
+on s.item=d.item and d.id=2
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:-1
+ Stage-1
+ Reducer 2
+ File Output Operator [FS_10]
+ Select Operator [SEL_9] (rows=5 width=228)
+ Output:["_col0","_col1","_col2"]
+ Merge Join Operator [MERGEJOIN_15] (rows=5 width=228)
+ Conds:RS_6._col1=RS_7._col3(Inner),Output:["_col0","_col3","_col4"]
+ <-Map 1 [SIMPLE_EDGE]
+ SHUFFLE [RS_6]
+ PartitionCols:_col1
+ Select Operator [SEL_2] (rows=5 width=88)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_13] (rows=5 width=88)
+ predicate:item is not null
+ TableScan [TS_0] (rows=5 width=88)
+ default@testdeci2,s,Tbl:COMPLETE,Col:COMPLETE,Output:["id","item"]
+ <-Map 3 [SIMPLE_EDGE]
+ SHUFFLE [RS_7]
+ PartitionCols:_col3
+ Select Operator [SEL_5] (rows=1 width=312)
+ Output:["_col1","_col2","_col3"]
+ Filter Operator [FIL_14] (rows=1 width=312)
+ predicate:((id = 2) and item is not null)
+ TableScan [TS_3] (rows=1 width=312)
+ default@testdeci2,s,Tbl:COMPLETE,Col:COMPLETE,Output:["id","amount","sales_tax","item"]
+