You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2016/04/05 21:13:29 UTC

hive git commit: HIVE-13200: Aggregation functions returning empty rows on partitioned columns (Yongzhi Chen, reviewed by Sergey Shelukhin)

Repository: hive
Updated Branches:
  refs/heads/branch-2.0 88e2237d3 -> 4945fd0bd


HIVE-13200: Aggregation functions returning empty rows on partitioned columns (Yongzhi Chen, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4945fd0b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4945fd0b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4945fd0b

Branch: refs/heads/branch-2.0
Commit: 4945fd0bd49e14e05581940e56b940fdc30121f9
Parents: 88e2237
Author: Yongzhi Chen <yc...@apache.org>
Authored: Thu Mar 3 11:55:37 2016 -0500
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Tue Apr 5 12:13:22 2016 -0700

----------------------------------------------------------------------
 .../physical/MetadataOnlyOptimizer.java         |   3 +-
 .../hadoop/hive/ql/plan/TableScanDesc.java      |  16 ++
 .../test/queries/clientpositive/skiphf_aggr.q   |  42 +++
 .../results/clientpositive/skiphf_aggr.q.out    | 267 +++++++++++++++++++
 4 files changed, 327 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/4945fd0b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java
index d47d3c2..5758282 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java
@@ -119,7 +119,8 @@ public class MetadataOnlyOptimizer implements PhysicalPlanResolver {
       boolean noColNeeded = (colIDs == null) || (colIDs.isEmpty());
       boolean noVCneeded = (desc == null) || (desc.getVirtualCols() == null)
                              || (desc.getVirtualCols().isEmpty());
-      if (noColNeeded && noVCneeded) {
+      boolean isSkipHF = desc.isNeedSkipHeaderFooters();
+      if (noColNeeded && noVCneeded && !isSkipHF) {
         walkerCtx.setMayBeMetadataOnly(tsOp);
       }
       return nd;

http://git-wip-us.apache.org/repos/asf/hive/blob/4945fd0b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
index 098aa89..8bf82de 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
 import org.apache.hadoop.hive.ql.parse.TableSample;
 import org.apache.hadoop.hive.ql.plan.Explain.Level;
+import org.apache.hadoop.hive.serde.serdeConstants;
 
 
 /**
@@ -344,4 +345,19 @@ public class TableScanDesc extends AbstractOperatorDesc {
   public void setNumBuckets(int numBuckets) {
     this.numBuckets = numBuckets;
   }
+
+  public boolean isNeedSkipHeaderFooters() {
+    boolean rtn = false;
+    if (tableMetadata != null && tableMetadata.getTTable() != null) {
+      Map<String, String> params = tableMetadata.getTTable().getParameters();
+      if (params != null) {
+        String skipHVal = params.get(serdeConstants.HEADER_COUNT);
+        int hcount = skipHVal == null? 0 : Integer.parseInt(skipHVal);
+        String skipFVal = params.get(serdeConstants.FOOTER_COUNT);
+        int fcount = skipFVal == null? 0 : Integer.parseInt(skipFVal);
+        rtn = (hcount != 0 || fcount !=0 );
+      }
+    }
+    return rtn;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/4945fd0b/ql/src/test/queries/clientpositive/skiphf_aggr.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/skiphf_aggr.q b/ql/src/test/queries/clientpositive/skiphf_aggr.q
new file mode 100644
index 0000000..fcd0b35
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/skiphf_aggr.q
@@ -0,0 +1,42 @@
+DROP TABLE IF EXISTS skipHTbl;
+
+CREATE TABLE skipHTbl (a int) 
+PARTITIONED BY (b int) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' 
+TBLPROPERTIES('skip.header.line.count'='1');
+
+INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1), (2), (3), (4);
+INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4);
+
+SELECT * FROM skipHTbl;
+
+SELECT DISTINCT b FROM skipHTbl;
+SELECT MAX(b) FROM skipHTbl;
+SELECT DISTINCT a FROM skipHTbl;
+
+INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1);
+INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4);
+
+SELECT DISTINCT b FROM skipHTbl;
+SELECT MIN(b) FROM skipHTbl;
+SELECT DISTINCT a FROM skipHTbl;
+
+DROP TABLE IF EXISTS skipFTbl;
+
+CREATE TABLE skipFTbl (a int) 
+PARTITIONED BY (b int) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' 
+TBLPROPERTIES('skip.footer.line.count'='1');
+
+INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 1) VALUES (1), (2), (3), (4);
+INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 2) VALUES (1), (2), (3), (4);
+
+SELECT * FROM skipFTbl;
+
+SELECT DISTINCT b FROM skipFTbl;
+SELECT MAX(b) FROM skipFTbl;
+SELECT DISTINCT a FROM skipFTbl;
+
+DROP TABLE skipHTbl;
+DROP TABLE skipFTbl;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/4945fd0b/ql/src/test/results/clientpositive/skiphf_aggr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/skiphf_aggr.q.out b/ql/src/test/results/clientpositive/skiphf_aggr.q.out
new file mode 100644
index 0000000..aeb4b1b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/skiphf_aggr.q.out
@@ -0,0 +1,267 @@
+PREHOOK: query: DROP TABLE IF EXISTS skipHTbl
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS skipHTbl
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE skipHTbl (a int) 
+PARTITIONED BY (b int) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' 
+TBLPROPERTIES('skip.header.line.count'='1')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@skipHTbl
+POSTHOOK: query: CREATE TABLE skipHTbl (a int) 
+PARTITIONED BY (b int) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' 
+TBLPROPERTIES('skip.header.line.count'='1')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@skipHTbl
+PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1), (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@skiphtbl@b=1
+POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1), (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@skiphtbl@b=1
+POSTHOOK: Lineage: skiphtbl PARTITION(b=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@skiphtbl@b=2
+POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@skiphtbl@b=2
+POSTHOOK: Lineage: skiphtbl PARTITION(b=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: SELECT * FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+2	1
+3	1
+4	1
+2	2
+3	2
+4	2
+PREHOOK: query: SELECT DISTINCT b FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT DISTINCT b FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+1
+2
+PREHOOK: query: SELECT MAX(b) FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MAX(b) FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+2
+PREHOOK: query: SELECT DISTINCT a FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT DISTINCT a FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+2
+3
+4
+PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__3
+PREHOOK: Output: default@skiphtbl@b=1
+POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 1) VALUES (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__3
+POSTHOOK: Output: default@skiphtbl@b=1
+POSTHOOK: Lineage: skiphtbl PARTITION(b=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__4
+PREHOOK: Output: default@skiphtbl@b=2
+POSTHOOK: query: INSERT OVERWRITE TABLE skipHTbl PARTITION (b = 2) VALUES (1), (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__4
+POSTHOOK: Output: default@skiphtbl@b=2
+POSTHOOK: Lineage: skiphtbl PARTITION(b=2).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: SELECT DISTINCT b FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT DISTINCT b FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+2
+PREHOOK: query: SELECT MIN(b) FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MIN(b) FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+2
+PREHOOK: query: SELECT DISTINCT a FROM skipHTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Input: default@skiphtbl@b=1
+PREHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT DISTINCT a FROM skipHTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Input: default@skiphtbl@b=1
+POSTHOOK: Input: default@skiphtbl@b=2
+#### A masked pattern was here ####
+2
+3
+4
+PREHOOK: query: DROP TABLE IF EXISTS skipFTbl
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS skipFTbl
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE skipFTbl (a int) 
+PARTITIONED BY (b int) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' 
+TBLPROPERTIES('skip.footer.line.count'='1')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@skipFTbl
+POSTHOOK: query: CREATE TABLE skipFTbl (a int) 
+PARTITIONED BY (b int) 
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' 
+TBLPROPERTIES('skip.footer.line.count'='1')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@skipFTbl
+PREHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 1) VALUES (1), (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__5
+PREHOOK: Output: default@skipftbl@b=1
+POSTHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 1) VALUES (1), (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__5
+POSTHOOK: Output: default@skipftbl@b=1
+POSTHOOK: Lineage: skipftbl PARTITION(b=1).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 2) VALUES (1), (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__6
+PREHOOK: Output: default@skipftbl@b=2
+POSTHOOK: query: INSERT OVERWRITE TABLE skipFTbl PARTITION (b = 2) VALUES (1), (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__6
+POSTHOOK: Output: default@skipftbl@b=2
+POSTHOOK: Lineage: skipftbl PARTITION(b=2).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: SELECT * FROM skipFTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skipftbl
+PREHOOK: Input: default@skipftbl@b=1
+PREHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM skipFTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skipftbl
+POSTHOOK: Input: default@skipftbl@b=1
+POSTHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+1	1
+2	1
+3	1
+1	2
+2	2
+3	2
+PREHOOK: query: SELECT DISTINCT b FROM skipFTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skipftbl
+PREHOOK: Input: default@skipftbl@b=1
+PREHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT DISTINCT b FROM skipFTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skipftbl
+POSTHOOK: Input: default@skipftbl@b=1
+POSTHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+1
+2
+PREHOOK: query: SELECT MAX(b) FROM skipFTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skipftbl
+PREHOOK: Input: default@skipftbl@b=1
+PREHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MAX(b) FROM skipFTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skipftbl
+POSTHOOK: Input: default@skipftbl@b=1
+POSTHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+2
+PREHOOK: query: SELECT DISTINCT a FROM skipFTbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skipftbl
+PREHOOK: Input: default@skipftbl@b=1
+PREHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT DISTINCT a FROM skipFTbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skipftbl
+POSTHOOK: Input: default@skipftbl@b=1
+POSTHOOK: Input: default@skipftbl@b=2
+#### A masked pattern was here ####
+1
+2
+3
+PREHOOK: query: DROP TABLE skipHTbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@skiphtbl
+PREHOOK: Output: default@skiphtbl
+POSTHOOK: query: DROP TABLE skipHTbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@skiphtbl
+POSTHOOK: Output: default@skiphtbl
+PREHOOK: query: DROP TABLE skipFTbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@skipftbl
+PREHOOK: Output: default@skipftbl
+POSTHOOK: query: DROP TABLE skipFTbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@skipftbl
+POSTHOOK: Output: default@skipftbl