You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2016/04/05 21:13:29 UTC
hive git commit: HIVE-13200: Aggregation functions returning empty
rows on partitioned columns (Yongzhi Chen, reviewed by Sergey Shelukhin)
Repository: hive
Updated Branches:
refs/heads/branch-2.0 88e2237d3 -> 4945fd0bd
HIVE-13200: Aggregation functions returning empty rows on partitioned columns (Yongzhi Chen, reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4945fd0b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4945fd0b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4945fd0b
Branch: refs/heads/branch-2.0
Commit: 4945fd0bd49e14e05581940e56b940fdc30121f9
Parents: 88e2237
Author: Yongzhi Chen <yc...@apache.org>
Authored: Thu Mar 3 11:55:37 2016 -0500
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Tue Apr 5 12:13:22 2016 -0700
----------------------------------------------------------------------
.../physical/MetadataOnlyOptimizer.java | 3 +-
.../hadoop/hive/ql/plan/TableScanDesc.java | 16 ++
.../test/queries/clientpositive/skiphf_aggr.q | 42 +++
.../results/clientpositive/skiphf_aggr.q.out | 267 +++++++++++++++++++
4 files changed, 327 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/4945fd0b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java
index d47d3c2..5758282 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java
@@ -119,7 +119,8 @@ public class MetadataOnlyOptimizer implements PhysicalPlanResolver {
boolean noColNeeded = (colIDs == null) || (colIDs.isEmpty());
boolean noVCneeded = (desc == null) || (desc.getVirtualCols() == null)
|| (desc.getVirtualCols().isEmpty());
- if (noColNeeded && noVCneeded) {
+ boolean isSkipHF = desc.isNeedSkipHeaderFooters();
+ if (noColNeeded && noVCneeded && !isSkipHF) {
walkerCtx.setMayBeMetadataOnly(tsOp);
}
return nd;
http://git-wip-us.apache.org/repos/asf/hive/blob/4945fd0b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
index 098aa89..8bf82de 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
import org.apache.hadoop.hive.ql.parse.TableSample;
import org.apache.hadoop.hive.ql.plan.Explain.Level;
+import org.apache.hadoop.hive.serde.serdeConstants;
/**
@@ -344,4 +345,19 @@ public class TableScanDesc extends AbstractOperatorDesc {
public void setNumBuckets(int numBuckets) {
this.numBuckets = numBuckets;
}
+
+ public boolean isNeedSkipHeaderFooters() {
+ boolean rtn = false;
+ if (tableMetadata != null && tableMetadata.getTTable() != null) {
+ Map<String, String> params = tableMetadata.getTTable().getParameters();
+ if (params != null) {
+ String skipHVal = params.get(serdeConstants.HEADER_COUNT);
+ int hcount = skipHVal == null? 0 : Integer.parseInt(skipHVal);
+ String skipFVal = params.get(serdeConstants.FOOTER_COUNT);
+ int fcount = skipFVal == null? 0 : Integer.parseInt(skipFVal);
+ rtn = (hcount != 0 || fcount !=0 );
+ }
+ }
+ return rtn;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/4945fd0b/ql/src/test/queries/clientpositive/skiphf_aggr.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/skiphf_aggr.q b/ql/src/test/queries/clientpositive/skiphf_aggr.q
new file mode 100644
index 0000000..fcd0b35
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/skiphf_aggr.q
@@ -0,0 +1,42 @@
+DROP TABLE IF EXISTS skipHTbl;
+
+CREATE TABLE skipHTbl (a int)
+PARTITIONED BY (b int)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+TBLPROPERTIES('skip.header.line.count'='1');
+
+INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1), (2), (3), (4);
+INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4);
+
+SELECT * FROM skipHTbl;
+
+SELECT DISTINCT b FROM skipHTbl;
+SELECT MAX(b) FROM skipHTbl;
+SELECT DISTINCT a FROM skipHTbl;
+
+INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1);
+INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4);
+
+SELECT DISTINCT b FROM skipHTbl;
+SELECT MIN(b) FROM skipHTbl;
+SELECT DISTINCT a FROM skipHTbl;
+
+DROP TABLE IF EXISTS skipFTbl;
+
+CREATE TABLE skipFTbl (a int)
+PARTITIONED BY (b int)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+TBLPROPERTIES('skip.footer.line.count'='1');
+
+INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 1) VALUES (1), (2), (3), (4);
+INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 2) VALUES (1), (2), (3), (4);
+
+SELECT * FROM skipFTbl;
+
+SELECT DISTINCT b FROM skipFTbl;
+SELECT MAX(b) FROM skipFTbl;
+SELECT DISTINCT a FROM skipFTbl;
+
+DROP TABLE skipHTbl;
+DROP TABLE skipFTbl;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/4945fd0b/ql/src/test/results/clientpositive/skiphf_aggr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/skiphf_aggr.q.out b/ql/src/test/results/clientpositive/skiphf_aggr.q.out
new file mode 100644
index 0000000..aeb4b1b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/skiphf_aggr.q.out
@@ -0,0 +1,267 @@
+PREHOOK: query: DROP TABLE IF EXISTS skipHTbl
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS skipHTbl
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE skipHTbl (a int)
+PARTITIONED BY (b int)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+TBLPROPERTIES('skip.header.line.count'='1')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@skipHTbl
+POSTHOOK: query: CREATE TABLE skipHTbl (a int)
+PARTITIONED BY (b int)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+TBLPROPERTIES('skip.header.line.count'='1')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@skipHTbl
+PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1), (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@skiphtbl@b=1
+POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1), (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@skiphtbl@b=1
+POSTHOOK: Lineage: skiphtbl PARTITION(b=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@skiphtbl@b=2
+POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@skiphtbl@b=2
+POSTHOOK: Lineage: skiphtbl PARTITION(b=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: SELECT * FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+2 1
+3 1
+4 1
+2 2
+3 2
+4 2
+PREHOOK: query: SELECT DISTINCT b FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT DISTINCT b FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+1
+2
+PREHOOK: query: SELECT MAX(b) FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MAX(b) FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+2
+PREHOOK: query: SELECT DISTINCT a FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT DISTINCT a FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+2
+3
+4
+PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__3
+PREHOOK: Output: default@skiphtbl@b=1
+POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__3
+POSTHOOK: Output: default@skiphtbl@b=1
+POSTHOOK: Lineage: skiphtbl PARTITION(b=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__4
+PREHOOK: Output: default@skiphtbl@b=2
+POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__4
+POSTHOOK: Output: default@skiphtbl@b=2
+POSTHOOK: Lineage: skiphtbl PARTITION(b=2).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: SELECT DISTINCT b FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT DISTINCT b FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+2
+PREHOOK: query: SELECT MIN(b) FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MIN(b) FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+2
+PREHOOK: query: SELECT DISTINCT a FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT DISTINCT a FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+2
+3
+4
+PREHOOK: query: DROP TABLE IF EXISTS skipFTbl
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS skipFTbl
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE skipFTbl (a int)
+PARTITIONED BY (b int)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+TBLPROPERTIES('skip.footer.line.count'='1')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@skipFTbl
+POSTHOOK: query: CREATE TABLE skipFTbl (a int)
+PARTITIONED BY (b int)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+TBLPROPERTIES('skip.footer.line.count'='1')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@skipFTbl
+PREHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 1) VALUES (1), (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__5
+PREHOOK: Output: default@skipftbl@b=1
+POSTHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 1) VALUES (1), (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__5
+POSTHOOK: Output: default@skipftbl@b=1
+POSTHOOK: Lineage: skipftbl PARTITION(b=1).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 2) VALUES (1), (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__6
+PREHOOK: Output: default@skipftbl@b=2
+POSTHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 2) VALUES (1), (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__6
+POSTHOOK: Output: default@skipftbl@b=2
+POSTHOOK: Lineage: skipftbl PARTITION(b=2).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: SELECT * FROM skipFTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skipftbl
+PREHOOK: Input: default@skipftbl@b=1
+PREHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM skipFTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skipftbl
+POSTHOOK: Input: default@skipftbl@b=1
+POSTHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+1 1
+2 1
+3 1
+1 2
+2 2
+3 2
+PREHOOK: query: SELECT DISTINCT b FROM skipFTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skipftbl
+PREHOOK: Input: default@skipftbl@b=1
+PREHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT DISTINCT b FROM skipFTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skipftbl
+POSTHOOK: Input: default@skipftbl@b=1
+POSTHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+1
+2
+PREHOOK: query: SELECT MAX(b) FROM skipFTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skipftbl
+PREHOOK: Input: default@skipftbl@b=1
+PREHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MAX(b) FROM skipFTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skipftbl
+POSTHOOK: Input: default@skipftbl@b=1
+POSTHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+2
+PREHOOK: query: SELECT DISTINCT a FROM skipFTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skipftbl
+PREHOOK: Input: default@skipftbl@b=1
+PREHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT DISTINCT a FROM skipFTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skipftbl
+POSTHOOK: Input: default@skipftbl@b=1
+POSTHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+1
+2
+3
+PREHOOK: query: DROP TABLE skipHTbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Output: default@skiphtbl
+POSTHOOK: query: DROP TABLE skipHTbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Output: default@skiphtbl
+PREHOOK: query: DROP TABLE skipFTbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@skipftbl
+PREHOOK: Output: default@skipftbl
+POSTHOOK: query: DROP TABLE skipFTbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@skipftbl
+POSTHOOK: Output: default@skipftbl