You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2018/05/23 16:57:25 UTC
[2/4] hive git commit: Revert "HIVE-19557: stats: filters for dates
are not taking advantage of min/max values (Zoltan Haindrich reviewed by
Ashutosh Chauhan)"
Revert "HIVE-19557: stats: filters for dates are not taking advantage of min/max values (Zoltan Haindrich reviewed by Ashutosh Chauhan)"
This reverts commit 0b2d364aa5977c7a15a4b5082cf9ef431dcc394e.
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7056445c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7056445c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7056445c
Branch: refs/heads/master
Commit: 7056445c1e9a1b040ae44dca99b351296325b047
Parents: 29f5784
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Wed May 23 09:51:40 2018 -0700
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Wed May 23 09:51:40 2018 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 -
.../stats/annotation/StatsRulesProcFactory.java | 10 +-
.../clientpositive/colstats_date_min_max.q | 30 ---
.../llap/colstats_date_min_max.q.out | 193 -------------------
.../clientpositive/llap/vector_between_in.q.out | 20 +-
5 files changed, 11 insertions(+), 243 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/7056445c/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 6007d5a..6528ec6 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -488,7 +488,6 @@ minillaplocal.query.files=\
cbo_subq_not_in.q,\
column_table_stats.q,\
column_table_stats_orc.q,\
- colstats_date_min_max.q,\
compare_double_bigint_2.q,\
constprog_dpp.q,\
current_date_timestamp.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/7056445c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 91cccfb..c770227 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -100,8 +100,6 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct;
import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -753,14 +751,8 @@ public class StatsRulesProcFactory {
}
} else if (colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME) ||
colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
- int value;
- if (colTypeLowerCase == serdeConstants.DATE_TYPE_NAME) {
- DateWritable writableVal = new DateWritable(java.sql.Date.valueOf(boundValue));
- value = writableVal.getDays();
- } else {
- value = new Integer(boundValue);
- }
// Date is an integer internally
+ int value = new Integer(boundValue);
int maxValue = cs.getRange().maxValue.intValue();
int minValue = cs.getRange().minValue.intValue();
if (upperBound) {
http://git-wip-us.apache.org/repos/asf/hive/blob/7056445c/ql/src/test/queries/clientpositive/colstats_date_min_max.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/colstats_date_min_max.q b/ql/src/test/queries/clientpositive/colstats_date_min_max.q
deleted file mode 100644
index 7f5be6a..0000000
--- a/ql/src/test/queries/clientpositive/colstats_date_min_max.q
+++ /dev/null
@@ -1,30 +0,0 @@
-set hive.explain.user=true;
-
-create table d1(d date);
--- tblproperties('transactional'='false');
-
-insert into d1 values
- ('2010-10-01'),
- ('2010-10-02'),
- ('2010-10-03'),
- ('2010-10-04'),
- ('2010-10-05'),
- ('2010-10-06'),
- ('2010-10-07'),
- ('2010-10-08'),
- ('2010-10-09'),
- ('2010-10-10');
-
-analyze table d1 compute statistics for columns;
-
-desc formatted d1;
-desc formatted d1 d;
-
-explain
-select 'stats: FIL ~0 read',count(1) from d1 where d < '2010-03-01';
-
-explain
-select 'stats: FIL estimate some read',count(1) from d1 where d < '2010-10-03';
-
-explain
-select 'stats: FIL estimate all read',count(1) from d1 where d < '2010-11-03';
http://git-wip-us.apache.org/repos/asf/hive/blob/7056445c/ql/src/test/results/clientpositive/llap/colstats_date_min_max.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/colstats_date_min_max.q.out b/ql/src/test/results/clientpositive/llap/colstats_date_min_max.q.out
deleted file mode 100644
index 7754f3e..0000000
--- a/ql/src/test/results/clientpositive/llap/colstats_date_min_max.q.out
+++ /dev/null
@@ -1,193 +0,0 @@
-PREHOOK: query: create table d1(d date)
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@d1
-POSTHOOK: query: create table d1(d date)
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@d1
-PREHOOK: query: insert into d1 values
- ('2010-10-01'),
- ('2010-10-02'),
- ('2010-10-03'),
- ('2010-10-04'),
- ('2010-10-05'),
- ('2010-10-06'),
- ('2010-10-07'),
- ('2010-10-08'),
- ('2010-10-09'),
- ('2010-10-10')
-PREHOOK: type: QUERY
-PREHOOK: Input: _dummy_database@_dummy_table
-PREHOOK: Output: default@d1
-POSTHOOK: query: insert into d1 values
- ('2010-10-01'),
- ('2010-10-02'),
- ('2010-10-03'),
- ('2010-10-04'),
- ('2010-10-05'),
- ('2010-10-06'),
- ('2010-10-07'),
- ('2010-10-08'),
- ('2010-10-09'),
- ('2010-10-10')
-POSTHOOK: type: QUERY
-POSTHOOK: Input: _dummy_database@_dummy_table
-POSTHOOK: Output: default@d1
-POSTHOOK: Lineage: d1.d SCRIPT []
-PREHOOK: query: analyze table d1 compute statistics for columns
-PREHOOK: type: ANALYZE_TABLE
-PREHOOK: Input: default@d1
-PREHOOK: Output: default@d1
-#### A masked pattern was here ####
-POSTHOOK: query: analyze table d1 compute statistics for columns
-POSTHOOK: type: ANALYZE_TABLE
-POSTHOOK: Input: default@d1
-POSTHOOK: Output: default@d1
-#### A masked pattern was here ####
-PREHOOK: query: desc formatted d1
-PREHOOK: type: DESCTABLE
-PREHOOK: Input: default@d1
-POSTHOOK: query: desc formatted d1
-POSTHOOK: type: DESCTABLE
-POSTHOOK: Input: default@d1
-# col_name data_type comment
-d date
-
-# Detailed Table Information
-Database: default
-#### A masked pattern was here ####
-Retention: 0
-#### A masked pattern was here ####
-Table Type: MANAGED_TABLE
-Table Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"d\":\"true\"}}
- bucketing_version 2
- numFiles 1
- numRows 10
- rawDataSize 100
- totalSize 110
-#### A masked pattern was here ####
-
-# Storage Information
-SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-InputFormat: org.apache.hadoop.mapred.TextInputFormat
-OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-Compressed: No
-Num Buckets: -1
-Bucket Columns: []
-Sort Columns: []
-Storage Desc Params:
- serialization.format 1
-PREHOOK: query: desc formatted d1 d
-PREHOOK: type: DESCTABLE
-PREHOOK: Input: default@d1
-POSTHOOK: query: desc formatted d1 d
-POSTHOOK: type: DESCTABLE
-POSTHOOK: Input: default@d1
-col_name d
-data_type date
-min 2010-10-01
-max 2010-10-10
-num_nulls 0
-distinct_count 10
-avg_col_len
-max_col_len
-num_trues
-num_falses
-bitVector HL
-comment from deserializer
-COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"d\":\"true\"}}
-PREHOOK: query: explain
-select 'stats: FIL ~0 read',count(1) from d1 where d < '2010-03-01'
-PREHOOK: type: QUERY
-POSTHOOK: query: explain
-select 'stats: FIL ~0 read',count(1) from d1 where d < '2010-03-01'
-POSTHOOK: type: QUERY
-Plan optimized by CBO.
-
-Vertex dependency in root stage
-Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
-
-Stage-0
- Fetch Operator
- limit:-1
- Stage-1
- Reducer 2 vectorized, llap
- File Output Operator [FS_15]
- Select Operator [SEL_14] (rows=1 width=110)
- Output:["_col0","_col1"]
- Group By Operator [GBY_13] (rows=1 width=8)
- Output:["_col0"],aggregations:["count(VALUE._col0)"]
- <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap
- PARTITION_ONLY_SHUFFLE [RS_12]
- Group By Operator [GBY_11] (rows=1 width=8)
- Output:["_col0"],aggregations:["count()"]
- Select Operator [SEL_10] (rows=1 width=56)
- Filter Operator [FIL_9] (rows=1 width=56)
- predicate:(d < DATE'2010-03-01')
- TableScan [TS_0] (rows=10 width=56)
- default@d1,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d"]
-
-PREHOOK: query: explain
-select 'stats: FIL estimate some read',count(1) from d1 where d < '2010-10-03'
-PREHOOK: type: QUERY
-POSTHOOK: query: explain
-select 'stats: FIL estimate some read',count(1) from d1 where d < '2010-10-03'
-POSTHOOK: type: QUERY
-Plan optimized by CBO.
-
-Vertex dependency in root stage
-Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
-
-Stage-0
- Fetch Operator
- limit:-1
- Stage-1
- Reducer 2 vectorized, llap
- File Output Operator [FS_15]
- Select Operator [SEL_14] (rows=1 width=121)
- Output:["_col0","_col1"]
- Group By Operator [GBY_13] (rows=1 width=8)
- Output:["_col0"],aggregations:["count(VALUE._col0)"]
- <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap
- PARTITION_ONLY_SHUFFLE [RS_12]
- Group By Operator [GBY_11] (rows=1 width=8)
- Output:["_col0"],aggregations:["count()"]
- Select Operator [SEL_10] (rows=3 width=56)
- Filter Operator [FIL_9] (rows=3 width=56)
- predicate:(d < DATE'2010-10-03')
- TableScan [TS_0] (rows=10 width=56)
- default@d1,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d"]
-
-PREHOOK: query: explain
-select 'stats: FIL estimate all read',count(1) from d1 where d < '2010-11-03'
-PREHOOK: type: QUERY
-POSTHOOK: query: explain
-select 'stats: FIL estimate all read',count(1) from d1 where d < '2010-11-03'
-POSTHOOK: type: QUERY
-Plan optimized by CBO.
-
-Vertex dependency in root stage
-Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
-
-Stage-0
- Fetch Operator
- limit:-1
- Stage-1
- Reducer 2 vectorized, llap
- File Output Operator [FS_15]
- Select Operator [SEL_14] (rows=1 width=120)
- Output:["_col0","_col1"]
- Group By Operator [GBY_13] (rows=1 width=8)
- Output:["_col0"],aggregations:["count(VALUE._col0)"]
- <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap
- PARTITION_ONLY_SHUFFLE [RS_12]
- Group By Operator [GBY_11] (rows=1 width=8)
- Output:["_col0"],aggregations:["count()"]
- Select Operator [SEL_10] (rows=10 width=56)
- Filter Operator [FIL_9] (rows=10 width=56)
- predicate:(d < DATE'2010-11-03')
- TableScan [TS_0] (rows=10 width=56)
- default@d1,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d"]
-
http://git-wip-us.apache.org/repos/asf/hive/blob/7056445c/ql/src/test/results/clientpositive/llap/vector_between_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out
index f76053e..6093beb 100644
--- a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out
@@ -465,7 +465,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterLongColumnBetween(col 3:date, left -2, right 1)
predicate: cdate BETWEEN DATE'1969-12-30' AND DATE'1970-01-02' (type: boolean)
- Statistics: Num rows: 4096 Data size: 217934 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 72627 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cdate (type: date)
outputColumnNames: _col0
@@ -473,7 +473,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [3]
- Statistics: Num rows: 4096 Data size: 217934 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 72627 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: date)
sort order: +
@@ -481,7 +481,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 4096 Data size: 217934 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 72627 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -509,13 +509,13 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0]
- Statistics: Num rows: 4096 Data size: 217934 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 72627 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 4096 Data size: 217934 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 72627 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -560,7 +560,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterLongColumnNotBetween(col 3:date, left -610, right 608)
predicate: cdate NOT BETWEEN DATE'1968-05-01' AND DATE'1971-09-01' (type: boolean)
- Statistics: Num rows: 8193 Data size: 435921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10924 Data size: 581228 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cdate (type: date)
outputColumnNames: _col0
@@ -568,7 +568,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [3]
- Statistics: Num rows: 8193 Data size: 435921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10924 Data size: 581228 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: date)
sort order: +
@@ -576,7 +576,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 8193 Data size: 435921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10924 Data size: 581228 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -604,13 +604,13 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0]
- Statistics: Num rows: 8193 Data size: 435921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10924 Data size: 581228 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 8193 Data size: 435921 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10924 Data size: 581228 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat