You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2018/08/22 16:21:50 UTC

hive git commit: HIVE-18624: Parsing time is extremely high (~10 min) for queries with complex select expressions (Zoltan Haindrich reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 09c63601b -> 4408661c0


HIVE-18624: Parsing time is extremely high (~10 min) for queries with complex select expressions (Zoltan Haindrich reviewed by Ashutosh Chauhan)

Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4408661c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4408661c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4408661c

Branch: refs/heads/master
Commit: 4408661c0501bf1e7991e144f65b49732f4c641b
Parents: 09c6360
Author: Zoltan Haindrich <ki...@rxd.hu>
Authored: Wed Aug 22 18:20:41 2018 +0200
Committer: Zoltan Haindrich <ki...@rxd.hu>
Committed: Wed Aug 22 18:20:41 2018 +0200

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/IdentifiersParser.g    |   2 +-
 .../hadoop/hive/ql/parse/TestParseDriver.java   | 100 +++++++++++++++++++
 .../hive/ql/parse/TestParseDriverIntervals.java |   3 +-
 .../clientnegative/char_pad_convert_fail2.q.out |   2 +-
 .../ptf_negative_DistributeByOrderBy.q.out      |   3 +-
 .../ptf_negative_PartitionBySortBy.q.out        |   3 +-
 .../clientnegative/ptf_window_boundaries.q.out  |   2 +-
 .../clientnegative/ptf_window_boundaries2.q.out |   2 +-
 8 files changed, 110 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/4408661c/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index 9e43ad5..2566b31 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -431,7 +431,7 @@ atomExpression
     | whenExpression
     | (subQueryExpression)=> (subQueryExpression)
         -> ^(TOK_SUBQUERY_EXPR TOK_SUBQUERY_OP subQueryExpression)
-    | (function) => function
+    | (functionName LPAREN) => function
     | tableOrColumn
     | expressionsInParenthesis[true, false]
     ;

http://git-wip-us.apache.org/repos/asf/hive/blob/4408661c/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriver.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriver.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriver.java
index 7c64af3..c5d099d 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriver.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriver.java
@@ -19,13 +19,22 @@ package org.apache.hadoop.hive.ql.parse;
 
 import static org.junit.Assert.assertEquals;
 
+import org.junit.FixMethodOrder;
 import org.junit.Test;
+import org.junit.runners.MethodSorters;
 
 
+@FixMethodOrder(MethodSorters.NAME_ASCENDING)
 public class TestParseDriver {
   ParseDriver parseDriver = new ParseDriver();
 
   @Test
+  public void atFirstWarmup() throws Exception {
+    // this test method is here to do an initial call to parsedriver; and prevent any tests with timeouts to be the first.
+    parseDriver.parse("select 1");
+  }
+
+  @Test
   public void testParse() throws Exception {
     String selectStr = "select field1, field2, sum(field3+field4)";
     String whereStr = "field5=1 and field6 in ('a', 'b')";
@@ -114,4 +123,95 @@ public class TestParseDriver {
       assertTree((ASTNode) astNode1.getChild(i), (ASTNode) astNode2.getChild(i));
     }
   }
+
+  @Test(timeout = 1000)
+  public void testNestedFunctionCalls() throws Exception {
+    // Expectation here is not to run into a timeout
+    parseDriver.parse(
+        "select greatest(1,greatest(1,greatest(1,greatest(1,greatest(1,greatest(1,greatest(1,"
+            + "greatest(1,greatest(1,greatest(1,greatest(1,greatest(1,greatest(1,greatest(1,"
+            + "greatest(1,greatest(1,(greatest(1,greatest(1,2)))))))))))))))))))");
+  }
+
+  @Test(timeout = 1000)
+  public void testHIVE18624() throws Exception {
+    // Expectation here is not to run into a timeout
+    parseDriver.parse("EXPLAIN\n" +
+        "SELECT DISTINCT\n" +
+        "\n" +
+        "\n" +
+        "  IF(lower('a') <= lower('a')\n" +
+        "  ,'a'\n" +
+        "  ,IF(('a' IS NULL AND from_unixtime(UNIX_TIMESTAMP()) <= 'a')\n" +
+        "  ,'a'\n" +
+        "  ,IF(if('a' = 'a', TRUE, FALSE) = 1\n" +
+        "  ,'a'\n" +
+        "  ,IF(('a' = 1 and lower('a') NOT IN ('a', 'a')\n" +
+        "       and lower(if('a' = 'a','a','a')) <= lower('a'))\n" +
+        "      OR ('a' like 'a' OR 'a' like 'a')\n" +
+        "      OR 'a' in ('a','a')\n" +
+        "  ,'a'\n" +
+        "  ,IF(if(lower('a') in ('a', 'a') and 'a'='a', TRUE, FALSE) = 1\n" +
+        "  ,'a'\n" +
+        "  ,IF('a'='a' and unix_timestamp(if('a' = 'a',cast('a' as string),coalesce('a',cast('a' as string),from_unixtime(unix_timestamp())))) <= unix_timestamp(concat_ws('a',cast(lower('a') as string),'00:00:00')) + 9*3600\n"
+        +
+        "  ,'a'\n" +
+        "\n" +
+        "  ,If(lower('a') <= lower('a')\n" +
+        "      and if(lower('a') in ('a', 'a') and 'a'<>'a', TRUE, FALSE) <> 1\n" +
+        "  ,'a'\n" +
+        "  ,IF('a'=1 AND 'a'=1\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' = 1 and COALESCE(cast('a' as int),0) = 0\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' = 'a'\n" +
+        "  ,'a'\n" +
+        "\n" +
+        "  ,If('a' = 'a' AND lower('a')>lower(if(lower('a')<1830,'a',cast(date_add('a',1) as timestamp)))\n" +
+        "  ,'a'\n" +
+        "\n" +
+        "\n" +
+        "\n" +
+        "  ,IF('a' = 1\n" +
+        "\n" +
+        "  ,IF('a' in ('a', 'a') and ((unix_timestamp('a')-unix_timestamp('a')) / 60) > 30 and 'a' = 1\n" +
+        "\n" +
+        "\n" +
+        "  ,'a', 'a')\n" +
+        "\n" +
+        "\n" +
+        "  ,IF(if('a' = 'a', FALSE, TRUE ) = 1 AND 'a' IS NULL\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' = 1 and 'a'>0\n" +
+        "  , 'a'\n" +
+        "\n" +
+        "  ,IF('a' = 1 AND 'a' ='a'\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' is not null and 'a' is not null and 'a' > 'a'\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' = 1\n" +
+        "  ,'a'\n" +
+        "\n" +
+        "  ,IF('a' = 'a'\n" +
+        "  ,'a'\n" +
+        "\n" +
+        "  ,If('a' = 1\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' = 1\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' = 1\n" +
+        "  ,'a'\n" +
+        "\n" +
+        "  ,IF('a' ='a' and 'a' ='a' and cast(unix_timestamp('a') as  int) + 93600 < cast(unix_timestamp()  as int)\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' = 'a'\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' = 'a' and 'a' in ('a','a','a')\n" +
+        "  ,'a'\n" +
+        "  ,IF('a' = 'a'\n" +
+        "  ,'a','a'))\n" +
+        "      )))))))))))))))))))))))\n" +
+        "AS test_comp_exp");
+  }
+
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/4408661c/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriverIntervals.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriverIntervals.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriverIntervals.java
index 4e0efe8..fdc4cfa 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriverIntervals.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestParseDriverIntervals.java
@@ -74,8 +74,9 @@ public class TestParseDriverIntervals {
     if (children != null) {
       for (Node c : children) {
         ASTNode r = findFunctionNode((ASTNode) c);
-        if (r != null)
+        if (r != null) {
           return r;
+        }
       }
     }
     return null;

http://git-wip-us.apache.org/repos/asf/hive/blob/4408661c/ql/src/test/results/clientnegative/char_pad_convert_fail2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/char_pad_convert_fail2.q.out b/ql/src/test/results/clientnegative/char_pad_convert_fail2.q.out
index b72a990..3dd8aef 100644
--- a/ql/src/test/results/clientnegative/char_pad_convert_fail2.q.out
+++ b/ql/src/test/results/clientnegative/char_pad_convert_fail2.q.out
@@ -40,4 +40,4 @@ POSTHOOK: query: load data local inpath '../../data/files/over1k' into table ove
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@over1k
-FAILED: ParseException line 7:11 cannot recognize input near 'lpad' '(' '{' in expression specification
+FAILED: ParseException line 7:12 cannot recognize input near '{' '"key1"' ':' in function specification

http://git-wip-us.apache.org/repos/asf/hive/blob/4408661c/ql/src/test/results/clientnegative/ptf_negative_DistributeByOrderBy.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/ptf_negative_DistributeByOrderBy.q.out b/ql/src/test/results/clientnegative/ptf_negative_DistributeByOrderBy.q.out
index ed24fe5..4237f85 100644
--- a/ql/src/test/results/clientnegative/ptf_negative_DistributeByOrderBy.q.out
+++ b/ql/src/test/results/clientnegative/ptf_negative_DistributeByOrderBy.q.out
@@ -1 +1,2 @@
-FAILED: ParseException line 4:3 cannot recognize input near 'sum' '(' 'p_retailprice' in expression specification
+FAILED: ParseException line 4:46 missing ) at 'order' near 'p_mfgr'
+line 4:61 missing EOF at ')' near 'p_mfgr'

http://git-wip-us.apache.org/repos/asf/hive/blob/4408661c/ql/src/test/results/clientnegative/ptf_negative_PartitionBySortBy.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/ptf_negative_PartitionBySortBy.q.out b/ql/src/test/results/clientnegative/ptf_negative_PartitionBySortBy.q.out
index ed24fe5..d662f42 100644
--- a/ql/src/test/results/clientnegative/ptf_negative_PartitionBySortBy.q.out
+++ b/ql/src/test/results/clientnegative/ptf_negative_PartitionBySortBy.q.out
@@ -1 +1,2 @@
-FAILED: ParseException line 4:3 cannot recognize input near 'sum' '(' 'p_retailprice' in expression specification
+FAILED: ParseException line 4:45 missing ) at 'sort' near 'p_mfgr'
+line 4:59 missing EOF at ')' near 'p_mfgr'

http://git-wip-us.apache.org/repos/asf/hive/blob/4408661c/ql/src/test/results/clientnegative/ptf_window_boundaries.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/ptf_window_boundaries.q.out b/ql/src/test/results/clientnegative/ptf_window_boundaries.q.out
index 4d2cac0..377f927 100644
--- a/ql/src/test/results/clientnegative/ptf_window_boundaries.q.out
+++ b/ql/src/test/results/clientnegative/ptf_window_boundaries.q.out
@@ -1 +1 @@
-FAILED: ParseException line 3:7 cannot recognize input near 'sum' '(' 'p_retailprice' in expression specification
+FAILED: ParseException line 3:44 mismatched input 'following' expecting KW_PRECEDING near 'unbounded' in windowframestartboundary

http://git-wip-us.apache.org/repos/asf/hive/blob/4408661c/ql/src/test/results/clientnegative/ptf_window_boundaries2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/ptf_window_boundaries2.q.out b/ql/src/test/results/clientnegative/ptf_window_boundaries2.q.out
index 4d2cac0..e2ac3d6 100644
--- a/ql/src/test/results/clientnegative/ptf_window_boundaries2.q.out
+++ b/ql/src/test/results/clientnegative/ptf_window_boundaries2.q.out
@@ -1 +1 @@
-FAILED: ParseException line 3:7 cannot recognize input near 'sum' '(' 'p_retailprice' in expression specification
+FAILED: ParseException line 3:45 mismatched input 'following' expecting KW_PRECEDING near 'unbounded' in windowframestartboundary