You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2019/04/02 11:32:48 UTC

[hive] branch branch-2.3 updated: HIVE-18624: Parsing time is extremely high (~10 min) for queries with complex select expressions (Zoltan Haindrich reviewed by Ashutosh Chauhan)

This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a commit to branch branch-2.3
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/branch-2.3 by this push:
     new a2cdcf2  HIVE-18624: Parsing time is extremely high (~10 min) for queries with complex select expressions (Zoltan Haindrich reviewed by Ashutosh Chauhan)
a2cdcf2 is described below

commit a2cdcf2bd9cbad9c78a66057298b360033aea54b
Author: Zoltan Haindrich <ki...@rxd.hu>
AuthorDate: Wed Aug 22 18:20:41 2018 +0200

    HIVE-18624: Parsing time is extremely high (~10 min) for queries with complex select expressions (Zoltan Haindrich reviewed by Ashutosh Chauhan)
    
    Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
    (cherry picked from commit 4408661c0501bf1e7991e144f65b49732f4c641b)
    (cherry picked from commit a4b913360d6086b5da8d1c84a2d3cfd847131056)
---
 .../hadoop/hive/ql/parse/IdentifiersParser.g       |   2 +-
 .../hadoop/hive/ql/parse/TestParseDriver.java      | 100 +++++++++++++++++++++
 .../hive/ql/parse/TestParseDriverIntervals.java    |   3 +-
 .../clientnegative/char_pad_convert_fail2.q.out    |   2 +-
 .../ptf_negative_DistributeByOrderBy.q.out         |   3 +-
 .../ptf_negative_PartitionBySortBy.q.out           |   3 +-
 .../clientnegative/ptf_window_boundaries.q.out     |   2 +-
 .../clientnegative/ptf_window_boundaries2.q.out    |   2 +-
 8 files changed, 110 insertions(+), 7 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index 8c4ee8a..071676a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -420,7 +420,7 @@ atomExpression
     | whenExpression
     | (subQueryExpression)=> (subQueryExpression)
         -> ^(TOK_SUBQUERY_EXPR TOK_SUBQUERY_OP subQueryExpression)
-    | (function) => function
+    | (functionName LPAREN) => function
     | tableOrColumn
     | expressionsInParenthesis[true]
     ;
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriver.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriver.java
index cd9db19..827921d 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriver.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriver.java
@@ -19,13 +19,22 @@ package org.apache.hadoop.hive.ql.parse;
 
 import static org.junit.Assert.assertEquals;
 
+import org.junit.FixMethodOrder;
 import org.junit.Test;
+import org.junit.runners.MethodSorters;
 
 
+@FixMethodOrder(MethodSorters.NAME_ASCENDING)
 public class TestParseDriver {
   ParseDriver parseDriver = new ParseDriver();
 
   @Test
+  public void atFirstWarmup() throws Exception {
+    // this test method is here to do an initial call to parsedriver; and prevent any tests with timeouts to be the first.
+    parseDriver.parse("select 1");
+  }
+
+  @Test
   public void testParse() throws Exception {
     String selectStr = "select field1, field2, sum(field3+field4)";
     String whereStr = "field5=1 and field6 in ('a', 'b')";
@@ -114,4 +123,95 @@ public class TestParseDriver {
       assertTree((ASTNode) astNode1.getChild(i), (ASTNode) astNode2.getChild(i));
     }
   }
+
+  @Test(timeout = 1000)
+  public void testNestedFunctionCalls() throws Exception {
+    // Expectation here is not to run into a timeout
+    parseDriver.parse(
+        "select greatest(1,greatest(1,greatest(1,greatest(1,greatest(1,greatest(1,greatest(1,"
+            + "greatest(1,greatest(1,greatest(1,greatest(1,greatest(1,greatest(1,greatest(1,"
+            + "greatest(1,greatest(1,(greatest(1,greatest(1,2)))))))))))))))))))");
+  }
+
+  @Test(timeout = 1000)
+  public void testHIVE18624() throws Exception {
+    // Expectation here is not to run into a timeout
+    parseDriver.parse("EXPLAIN\n" +
+        "SELECT DISTINCT\n" +
+        "\n" +
+        "\n" +
+        "  IF(lower('a') <= lower('a')\n" +
+        "  ,'a'\n" +
+        "  ,IF(('a' IS NULL AND from_unixtime(UNIX_TIMESTAMP()) <= 'a')\n" +
+        "  ,'a'\n" +
+        "  ,IF(if('a' = 'a', TRUE, FALSE) = 1\n" +
+        "  ,'a'\n" +
+        "  ,IF(('a' = 1 and lower('a') NOT IN ('a', 'a')\n" +
+        "       and lower(if('a' = 'a','a','a')) <= lower('a'))\n" +
+        "      OR ('a' like 'a' OR 'a' like 'a')\n" +
+        "      OR 'a' in ('a','a')\n" +
+        "  ,'a'\n" +
+        "  ,IF(if(lower('a') in ('a', 'a') and 'a'='a', TRUE, FALSE) = 1\n" +
+        "  ,'a'\n" +
+        "  ,IF('a'='a' and unix_timestamp(if('a' = 'a',cast('a' as string),coalesce('a',cast('a' as string),from_unixtime(unix_timestamp())))) <= unix_timestamp(concat_ws('a',cast(lower('a') as string),'00:00:00')) + 9*3600\n"
+        +
+        "  ,'a'\n" +
+        "\n" +
+        "  ,If(lower('a') <= lower('a')\n" +
+        "      and if(lower('a') in ('a', 'a') and 'a'<>'a', TRUE, FALSE) <> 1\n" +
+        "  ,'a'\n" +
+        "  ,IF('a'=1 AND 'a'=1\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' = 1 and COALESCE(cast('a' as int),0) = 0\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' = 'a'\n" +
+        "  ,'a'\n" +
+        "\n" +
+        "  ,If('a' = 'a' AND lower('a')>lower(if(lower('a')<1830,'a',cast(date_add('a',1) as timestamp)))\n" +
+        "  ,'a'\n" +
+        "\n" +
+        "\n" +
+        "\n" +
+        "  ,IF('a' = 1\n" +
+        "\n" +
+        "  ,IF('a' in ('a', 'a') and ((unix_timestamp('a')-unix_timestamp('a')) / 60) > 30 and 'a' = 1\n" +
+        "\n" +
+        "\n" +
+        "  ,'a', 'a')\n" +
+        "\n" +
+        "\n" +
+        "  ,IF(if('a' = 'a', FALSE, TRUE ) = 1 AND 'a' IS NULL\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' = 1 and 'a'>0\n" +
+        "  , 'a'\n" +
+        "\n" +
+        "  ,IF('a' = 1 AND 'a' ='a'\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' is not null and 'a' is not null and 'a' > 'a'\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' = 1\n" +
+        "  ,'a'\n" +
+        "\n" +
+        "  ,IF('a' = 'a'\n" +
+        "  ,'a'\n" +
+        "\n" +
+        "  ,If('a' = 1\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' = 1\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' = 1\n" +
+        "  ,'a'\n" +
+        "\n" +
+        "  ,IF('a' ='a' and 'a' ='a' and cast(unix_timestamp('a') as  int) + 93600 < cast(unix_timestamp()  as int)\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' = 'a'\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' = 'a' and 'a' in ('a','a','a')\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' = 'a'\n" +
+        "  ,'a','a'))\n" +
+        "      )))))))))))))))))))))))\n" +
+        "AS test_comp_exp");
+  }
+
 }
\ No newline at end of file
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriverIntervals.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriverIntervals.java
index 98ad12a..dd25f51 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriverIntervals.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriverIntervals.java
@@ -74,8 +74,9 @@ public class TestParseDriverIntervals {
     if (children != null) {
       for (Node c : children) {
         ASTNode r = findFunctionNode((ASTNode) c);
-        if (r != null)
+        if (r != null) {
           return r;
+        }
       }
     }
     return null;
diff --git a/ql/src/test/results/clientnegative/char_pad_convert_fail2.q.out b/ql/src/test/results/clientnegative/char_pad_convert_fail2.q.out
index 2780b7c..90f9356 100644
--- a/ql/src/test/results/clientnegative/char_pad_convert_fail2.q.out
+++ b/ql/src/test/results/clientnegative/char_pad_convert_fail2.q.out
@@ -40,4 +40,4 @@ POSTHOOK: query: load data local inpath '../../data/files/over1k' into table ove
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@over1k
-FAILED: ParseException line 7:11 cannot recognize input near 'lpad' '(' '{' in expression specification
+FAILED: ParseException line 7:12 cannot recognize input near '{' '"key1"' ':' in function specification
diff --git a/ql/src/test/results/clientnegative/ptf_negative_DistributeByOrderBy.q.out b/ql/src/test/results/clientnegative/ptf_negative_DistributeByOrderBy.q.out
index e8e8580..cb7f154 100644
--- a/ql/src/test/results/clientnegative/ptf_negative_DistributeByOrderBy.q.out
+++ b/ql/src/test/results/clientnegative/ptf_negative_DistributeByOrderBy.q.out
@@ -1 +1,2 @@
-FAILED: ParseException line 3:3 cannot recognize input near 'sum' '(' 'p_retailprice' in expression specification
+FAILED: ParseException line 3:46 missing ) at 'order' near 'p_mfgr'
+line 3:61 missing EOF at ')' near 'p_mfgr'
diff --git a/ql/src/test/results/clientnegative/ptf_negative_PartitionBySortBy.q.out b/ql/src/test/results/clientnegative/ptf_negative_PartitionBySortBy.q.out
index e8e8580..5daf86c 100644
--- a/ql/src/test/results/clientnegative/ptf_negative_PartitionBySortBy.q.out
+++ b/ql/src/test/results/clientnegative/ptf_negative_PartitionBySortBy.q.out
@@ -1 +1,2 @@
-FAILED: ParseException line 3:3 cannot recognize input near 'sum' '(' 'p_retailprice' in expression specification
+FAILED: ParseException line 3:45 missing ) at 'sort' near 'p_mfgr'
+line 3:59 missing EOF at ')' near 'p_mfgr'
diff --git a/ql/src/test/results/clientnegative/ptf_window_boundaries.q.out b/ql/src/test/results/clientnegative/ptf_window_boundaries.q.out
index c9c1c6d..c76feee 100644
--- a/ql/src/test/results/clientnegative/ptf_window_boundaries.q.out
+++ b/ql/src/test/results/clientnegative/ptf_window_boundaries.q.out
@@ -1 +1 @@
-FAILED: ParseException line 2:7 cannot recognize input near 'sum' '(' 'p_retailprice' in expression specification
+FAILED: ParseException line 2:44 mismatched input 'following' expecting KW_PRECEDING near 'unbounded' in windowframestartboundary
diff --git a/ql/src/test/results/clientnegative/ptf_window_boundaries2.q.out b/ql/src/test/results/clientnegative/ptf_window_boundaries2.q.out
index c9c1c6d..9ed8be5 100644
--- a/ql/src/test/results/clientnegative/ptf_window_boundaries2.q.out
+++ b/ql/src/test/results/clientnegative/ptf_window_boundaries2.q.out
@@ -1 +1 @@
-FAILED: ParseException line 2:7 cannot recognize input near 'sum' '(' 'p_retailprice' in expression specification
+FAILED: ParseException line 2:45 mismatched input 'following' expecting KW_PRECEDING near 'unbounded' in windowframestartboundary