You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by jc...@apache.org on 2019/03/26 19:22:39 UTC

[hive] branch master updated (d07cf0d -> 866498a)

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git.


    from d07cf0d  HIVE-21395: Refactor HiveSemiJoinRule (Vineet Garg, reviewed by Jesus Camacho Rodriguez)
     new d7ed17e  HIVE-21496: Automatic sizing of unordered buffer can overflow (Jesus Camacho Rodriguez, reviewed by Prasanth Jayachandran)
     new 866498a  HIVE-21497: Direct SQL exception thrown by PartitionManagementTask (Jesus Camacho Rodriguez, reviewed by Prasanth Jayachandran)

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../java/org/apache/hadoop/hive/ql/stats/StatsUtils.java  |  5 ++---
 .../clientpositive/llap/constraints_optimization.q.out    |  6 +++---
 .../clientpositive/llap/results_cache_temptable.q.out     |  8 ++++----
 .../clientpositive/perf/tez/constraints/query14.q.out     | 14 +++++++-------
 ql/src/test/results/clientpositive/perf/tez/query14.q.out | 14 +++++++-------
 .../apache/hadoop/hive/metastore/MetaStoreDirectSql.java  | 15 ++++++++++++---
 6 files changed, 35 insertions(+), 27 deletions(-)

[hive] 01/02: HIVE-21496: Automatic sizing of unordered buffer can overflow (Jesus Camacho Rodriguez, reviewed by Prasanth Jayachandran)

Posted by jc...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git

commit d7ed17e27eda65dc0ea641887b6389bf0aec7e96
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Mon Mar 25 12:59:14 2019 -0700

    HIVE-21496: Automatic sizing of unordered buffer can overflow (Jesus Camacho Rodriguez, reviewed by Prasanth Jayachandran)
---
 .../java/org/apache/hadoop/hive/ql/stats/StatsUtils.java   |  5 ++---
 .../clientpositive/llap/constraints_optimization.q.out     |  6 +++---
 .../clientpositive/llap/results_cache_temptable.q.out      |  8 ++++----
 .../clientpositive/perf/tez/constraints/query14.q.out      | 14 +++++++-------
 ql/src/test/results/clientpositive/perf/tez/query14.q.out  | 14 +++++++-------
 5 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 6149880..46048cd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -1967,10 +1967,7 @@ public class StatsUtils {
     if (useColStats) {
       List<ColStatistics> colStats = stats.getColumnStats();
       for (ColStatistics cs : colStats) {
-        long oldNumNulls = cs.getNumNulls();
         long oldDV = cs.getCountDistint();
-        long newNumNulls = Math.round(ratio * oldNumNulls);
-        cs.setNumNulls(newNumNulls);
         if (affectedColumns.contains(cs.getColumnName())) {
           long newDV = oldDV;
 
@@ -1987,6 +1984,8 @@ public class StatsUtils {
         if (oldDV > newNumRows) {
           cs.setCountDistint(newNumRows);
         }
+        long newNumNulls = Math.round(ratio * cs.getNumNulls());
+        cs.setNumNulls(newNumNulls > newNumRows ? newNumRows: newNumNulls);
       }
       stats.setColumnStats(colStats);
       long newDataSize = StatsUtils.getDataSizeFromColumnStats(newNumRows, colStats);
diff --git a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out
index fbdc702..afcf53f 100644
--- a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out
+++ b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out
@@ -601,12 +601,12 @@ STAGE PLANS:
                           minReductionHashAggr: 0.0
                           mode: hash
                           outputColumnNames: _col0, _col1
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
                           Reduce Output Operator
                             key expressions: _col0 (type: bigint), _col1 (type: string)
                             sort order: ++
                             Map-reduce partition columns: _col0 (type: bigint), _col1 (type: string)
-                            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                            Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
                             TopN Hash Memory Usage: 0.1
             Execution mode: vectorized, llap
             LLAP IO: no inputs
@@ -617,7 +617,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: bigint), KEY._col1 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE
                 Select Operator
                   expressions: _col0 (type: bigint)
                   outputColumnNames: _col0
diff --git a/ql/src/test/results/clientpositive/llap/results_cache_temptable.q.out b/ql/src/test/results/clientpositive/llap/results_cache_temptable.q.out
index 4f1e3a7..d6eb82d 100644
--- a/ql/src/test/results/clientpositive/llap/results_cache_temptable.q.out
+++ b/ql/src/test/results/clientpositive/llap/results_cache_temptable.q.out
@@ -316,10 +316,10 @@ STAGE PLANS:
                         minReductionHashAggr: 0.99
                         mode: hash
                         outputColumnNames: _col0
-                        Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 1 Data size: 376 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           sort order: 
-                          Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 1 Data size: 376 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col0 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
@@ -330,10 +330,10 @@ STAGE PLANS:
                 aggregations: count(VALUE._col0)
                 mode: mergepartial
                 outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 376 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 376 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out
index 1a3aefe..0f48872 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out
@@ -297,21 +297,21 @@ Stage-0
     Stage-1
       Reducer 9 vectorized
       File Output Operator [FS_1350]
-        Limit [LIM_1349] (rows=7 width=192)
+        Limit [LIM_1349] (rows=7 width=212)
           Number of rows:100
-          Select Operator [SEL_1348] (rows=7 width=192)
+          Select Operator [SEL_1348] (rows=7 width=212)
             Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
           <-Reducer 8 [SIMPLE_EDGE] vectorized
             SHUFFLE [RS_1347]
-              Select Operator [SEL_1346] (rows=7 width=192)
+              Select Operator [SEL_1346] (rows=7 width=212)
                 Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
-                Group By Operator [GBY_1345] (rows=7 width=200)
+                Group By Operator [GBY_1345] (rows=7 width=220)
                   Output:["_col0","_col1","_col2","_col3","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4
                 <-Union 7 [SIMPLE_EDGE]
                   <-Reducer 16 [CONTAINS]
                     Reduce Output Operator [RS_1195]
                       PartitionCols:_col0, _col1, _col2, _col3, _col4
-                      Group By Operator [GBY_1194] (rows=7 width=200)
+                      Group By Operator [GBY_1194] (rows=7 width=220)
                         Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L
                         Top N Key Operator [TNK_1193] (rows=3 width=221)
                           keys:_col0, _col1, _col2, _col3, 0L,sort order:+++++,top n:100
@@ -683,7 +683,7 @@ Stage-0
                   <-Reducer 22 [CONTAINS]
                     Reduce Output Operator [RS_1202]
                       PartitionCols:_col0, _col1, _col2, _col3, _col4
-                      Group By Operator [GBY_1201] (rows=7 width=200)
+                      Group By Operator [GBY_1201] (rows=7 width=220)
                         Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L
                         Top N Key Operator [TNK_1200] (rows=3 width=221)
                           keys:_col0, _col1, _col2, _col3, 0L,sort order:+++++,top n:100
@@ -866,7 +866,7 @@ Stage-0
                   <-Reducer 6 [CONTAINS]
                     Reduce Output Operator [RS_1188]
                       PartitionCols:_col0, _col1, _col2, _col3, _col4
-                      Group By Operator [GBY_1187] (rows=7 width=200)
+                      Group By Operator [GBY_1187] (rows=7 width=220)
                         Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L
                         Top N Key Operator [TNK_1186] (rows=3 width=221)
                           keys:_col0, _col1, _col2, _col3, 0L,sort order:+++++,top n:100
diff --git a/ql/src/test/results/clientpositive/perf/tez/query14.q.out b/ql/src/test/results/clientpositive/perf/tez/query14.q.out
index fd8eb9b..66c4f39 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query14.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query14.q.out
@@ -297,21 +297,21 @@ Stage-0
     Stage-1
       Reducer 9 vectorized
       File Output Operator [FS_1335]
-        Limit [LIM_1334] (rows=7 width=192)
+        Limit [LIM_1334] (rows=7 width=212)
           Number of rows:100
-          Select Operator [SEL_1333] (rows=7 width=192)
+          Select Operator [SEL_1333] (rows=7 width=212)
             Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
           <-Reducer 8 [SIMPLE_EDGE] vectorized
             SHUFFLE [RS_1332]
-              Select Operator [SEL_1331] (rows=7 width=192)
+              Select Operator [SEL_1331] (rows=7 width=212)
                 Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
-                Group By Operator [GBY_1330] (rows=7 width=200)
+                Group By Operator [GBY_1330] (rows=7 width=220)
                   Output:["_col0","_col1","_col2","_col3","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4
                 <-Union 7 [SIMPLE_EDGE]
                   <-Reducer 16 [CONTAINS]
                     Reduce Output Operator [RS_1177]
                       PartitionCols:_col0, _col1, _col2, _col3, _col4
-                      Group By Operator [GBY_1176] (rows=7 width=200)
+                      Group By Operator [GBY_1176] (rows=7 width=220)
                         Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L
                         Top N Key Operator [TNK_1175] (rows=3 width=221)
                           keys:_col0, _col1, _col2, _col3, 0L,sort order:+++++,top n:100
@@ -687,7 +687,7 @@ Stage-0
                   <-Reducer 22 [CONTAINS]
                     Reduce Output Operator [RS_1184]
                       PartitionCols:_col0, _col1, _col2, _col3, _col4
-                      Group By Operator [GBY_1183] (rows=7 width=200)
+                      Group By Operator [GBY_1183] (rows=7 width=220)
                         Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L
                         Top N Key Operator [TNK_1182] (rows=3 width=221)
                           keys:_col0, _col1, _col2, _col3, 0L,sort order:+++++,top n:100
@@ -874,7 +874,7 @@ Stage-0
                   <-Reducer 6 [CONTAINS]
                     Reduce Output Operator [RS_1170]
                       PartitionCols:_col0, _col1, _col2, _col3, _col4
-                      Group By Operator [GBY_1169] (rows=7 width=200)
+                      Group By Operator [GBY_1169] (rows=7 width=220)
                         Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L
                         Top N Key Operator [TNK_1168] (rows=3 width=221)
                           keys:_col0, _col1, _col2, _col3, 0L,sort order:+++++,top n:100

[hive] 02/02: HIVE-21497: Direct SQL exception thrown by PartitionManagementTask (Jesus Camacho Rodriguez, reviewed by Prasanth Jayachandran)

Posted by jc...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git

commit 866498ae0160e7475438a0b17bf3ee7b3d1a6a50
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Mon Mar 25 15:55:26 2019 -0700

    HIVE-21497: Direct SQL exception thrown by PartitionManagementTask (Jesus Camacho Rodriguez, reviewed by Prasanth Jayachandran)
---
 .../apache/hadoop/hive/metastore/MetaStoreDirectSql.java  | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index b0cbe01..45b89e0 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -23,7 +23,6 @@ import static org.apache.commons.lang.StringUtils.normalizeSpace;
 import static org.apache.commons.lang.StringUtils.repeat;
 import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_CATALOG_NAME;
 
-import java.net.URL;
 import java.sql.Connection;
 import java.sql.SQLException;
 import java.sql.Statement;
@@ -1173,6 +1172,7 @@ class MetaStoreDirectSql {
       // Build the filter and add parameters linearly; we are traversing leaf nodes LTR.
       String tableValue = "\"FILTER" + partColIndex + "\".\"PART_KEY_VAL\"";
 
+      String nodeValue0 = "?";
       if (node.isReverseOrder) {
         params.add(nodeValue);
       }
@@ -1206,14 +1206,23 @@ class MetaStoreDirectSql {
           params.add(table.getCatName().toLowerCase());
         }
         tableValue += " then " + tableValue0 + " else null end)";
+
+        if (valType == FilterType.Date) {
+          if (dbType == DatabaseProduct.ORACLE) {
+            // Oracle requires special treatment... as usual.
+            nodeValue0 = "TO_DATE(" + nodeValue0 + ", 'YYYY-MM-DD')";
+          } else {
+            nodeValue0 = "cast(" + nodeValue0 + " as date)";
+          }
+        }
       }
       if (!node.isReverseOrder) {
         params.add(nodeValue);
       }
 
       filterBuffer.append(node.isReverseOrder
-          ? "(? " + node.operator.getSqlOp() + " " + tableValue + ")"
-          : "(" + tableValue + " " + node.operator.getSqlOp() + " ?)");
+          ? "(" + nodeValue0 + " " + node.operator.getSqlOp() + " " + tableValue + ")"
+          : "(" + tableValue + " " + node.operator.getSqlOp() + " " + nodeValue0 + ")");
     }
   }