You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2021/12/01 07:25:28 UTC

[impala] branch master updated (f566e7d -> b0b6e11)

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from f566e7d  IMPALA-10994: Normalize the pip package name part of download URL.
     new 6f80a1f  IMPALA-11021: Fix bug when query contains illegal predicate hints
     new b0b6e11  IMPALA-10970: Fix criterion for classifying coordinator only query

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/scheduling/scheduler.cc                     |   4 +-
 common/thrift/Query.thrift                         |   4 +-
 .../java/org/apache/impala/analysis/Analyzer.java  |  17 ++-
 .../apache/impala/analysis/AnalyzeStmtsTest.java   |  35 ++++++
 .../functional-query/queries/QueryTest/mt-dop.test | 123 +++++++++++++++++++++
 5 files changed, 180 insertions(+), 3 deletions(-)

[impala] 02/02: IMPALA-10970: Fix criterion for classifying coordinator only query

Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit b0b6e11f53ce4a43423bdd14297cc6624d344760
Author: guojingfeng <gu...@tencent.com>
AuthorDate: Tue Oct 12 17:09:30 2021 +0800

    IMPALA-10970: Fix criterion for classifying coordinator only query
    
    This patch fixes a bug in the criterion which decided whether a query
    can be considered as a coordinator only query. It did not consider
    the possibility of parallel plans and ended up mis-classifying some
    queries as coordinator only queries.
    
    This classification was used during scheduling when dedicated
    coordinators and executor groups are used and allowed coordinator
    queries to be scheduled only on the coordinator even in the absence
    of healthy executor groups.
    
    As a result of this bug, queries classified wrongly ended up with
    error code: NO_REGISTERED_BACKENDS.
    
    Testing:
    - Add new mt_dop test case for functional_query and pass
    - Ran and passed custom_cluster/test_coordinators, test_executor_groups
    
    Change-Id: Icaaf1f1ba7a976122b4d37bd675e6d8181dc8700
    Reviewed-on: http://gerrit.cloudera.org:8080/17937
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Bikramjeet Vig <bi...@cloudera.com>
---
 be/src/scheduling/scheduler.cc                     |   4 +-
 common/thrift/Query.thrift                         |   4 +-
 .../functional-query/queries/QueryTest/mt-dop.test | 123 +++++++++++++++++++++
 3 files changed, 129 insertions(+), 2 deletions(-)

diff --git a/be/src/scheduling/scheduler.cc b/be/src/scheduling/scheduler.cc
index 4ed5007..c6cb5c0 100644
--- a/be/src/scheduling/scheduler.cc
+++ b/be/src/scheduling/scheduler.cc
@@ -825,11 +825,13 @@ Status Scheduler::Schedule(const ExecutorConfig& executor_config, ScheduleState*
 
 bool Scheduler::IsCoordinatorOnlyQuery(const TQueryExecRequest& exec_request) {
   DCHECK_GT(exec_request.plan_exec_info.size(), 0);
+  int64_t num_parallel_plans = exec_request.plan_exec_info.size();
   const TPlanExecInfo& plan_exec_info = exec_request.plan_exec_info[0];
   int64_t num_fragments = plan_exec_info.fragments.size();
   DCHECK_GT(num_fragments, 0);
   auto type = plan_exec_info.fragments[0].partition.type;
-  return num_fragments == 1 && type == TPartitionType::UNPARTITIONED;
+  return num_parallel_plans == 1 && num_fragments == 1
+      && type == TPartitionType::UNPARTITIONED;
 }
 
 void Scheduler::ComputeBackendExecParams(
diff --git a/common/thrift/Query.thrift b/common/thrift/Query.thrift
index 80f3927..5879864 100644
--- a/common/thrift/Query.thrift
+++ b/common/thrift/Query.thrift
@@ -785,7 +785,9 @@ struct TFinalizeParams {
 
 // Result of call to ImpalaPlanService/JniFrontend.CreateQueryRequest()
 struct TQueryExecRequest {
-  // exec info for all plans; the first one materializes the query result
+  // exec info for all plans; the first one materializes the query result, and subsequent
+  // plans materialize the build sides of joins. Each plan appears before its
+  // dependencies in the list.
   1: optional list<TPlanExecInfo> plan_exec_info
 
   // Metadata of the query result set (only for select)
diff --git a/testdata/workloads/functional-query/queries/QueryTest/mt-dop.test b/testdata/workloads/functional-query/queries/QueryTest/mt-dop.test
index a46693a..faedcce 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/mt-dop.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/mt-dop.test
@@ -26,3 +26,126 @@ int
 6
 7
 ====
+---- QUERY
+# IMPALA-10970: Verify that a query plan containing 2 parallel plans
+# with the first one having a single unpartitioned fragment is
+# correctly classified as not being a coordinator only query. This
+# query create a plan with the aforementioned condition and has a
+# join build fragment that will be co-located with the root sink
+# fragment(IMPALMA-4224). This ensures that the query fails to
+# schedule and eventually times out in the queue if incorrectly
+# classified as a coordinator only query.
+# To ensure the inline left table placed at join probe side, we add
+# STRAIGHT_JOIN hint to disable join inversion optimization.
+SELECT STRAIGHT_JOIN t2.int_col
+FROM
+    ( SELECT '20210831' AS ts
+     UNION ALL SELECT '20210901' AS ts
+     UNION ALL SELECT '20210902' AS ts
+     UNION ALL SELECT '20210903' AS ts ) t1
+CROSS JOIN functional.alltypes t2
+LIMIT 100;
+---- RESULTS
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+---- TYPES
+INT
+====
\ No newline at end of file

[impala] 01/02: IMPALA-11021: Fix bug when query contains illegal predicate hints

Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 6f80a1f16015b628a31548bdbecd61a554fc0d85
Author: skyyws <sk...@163.com>
AuthorDate: Fri Nov 19 10:27:06 2021 +0800

    IMPALA-11021: Fix bug when query contains illegal predicate hints
    
    Currently Impala support predicate hint: ALWAYS_TRUE, we can use this
    hint after where keyword. If we use illegal hints carelessly, query
    will throw IllegalStateException which is not expected. Query should
    return normal results with a warning instead of a exception. This is
    due to the condition check in Analyzer.addWarning().
    After create TExecRequest and initialize it, Impala will get warnings
    from 'GlobalState.warnings', and 'GlobalState.warningsRetrieved' will
    be set to 'true' then. But after this, Impala will substitute predicate
    by clone(), and analyze new predicate in later phase. New predicate
    analyze will add hint warning to 'GlobalState.warnings', but failed and
    throw IllegalStateException due to 'globalState_.warningsRetrieved'
    check failed which is expected as 'false'.
    This check is added in IMPALA-4166, I removed original condition check
    and added a new check to ensure that all warnings for new/substituted
    predicates are already exists in 'globalState_.warnings'. And this will
    also avoiding exception caused by illegal hints.
    
    Testing:
    - Added new fe tests in 'AnalyzeStmtsTest'
    
    Change-Id: Id719bc4280c811456333eb4b4ec5bc9cb8bae128
    Reviewed-on: http://gerrit.cloudera.org:8080/18040
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../java/org/apache/impala/analysis/Analyzer.java  | 17 ++++++++++-
 .../apache/impala/analysis/AnalyzeStmtsTest.java   | 35 ++++++++++++++++++++++
 2 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index 3b95bcd..6b5d137 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -3324,7 +3324,8 @@ public class Analyzer {
    * getWarnings() has been called, no warning may be added to the Analyzer anymore.
    */
   public void addWarning(String msg) {
-    Preconditions.checkState(!globalState_.warningsRetrieved);
+    if (checkWarningsRetrieved(msg)) return;
+
     if (msg == null) return;
     Integer count = globalState_.warnings.get(msg);
     if (count == null) count = 0;
@@ -3332,6 +3333,20 @@ public class Analyzer {
   }
 
   /**
+   * 'addWarning' method may be called after the warnings are retrieved, e.g. in
+   * analyzing some substituted/cloned predicates (IMPALA-11021). We need to make sure
+   * no new warnings are added after retrieved.
+   */
+  private boolean checkWarningsRetrieved(String msg) {
+    if (globalState_.warningsRetrieved) {
+      // Make sure that substituted/cloned predicates' warnings already exists in map.
+      Preconditions.checkState(globalState_.warnings.containsKey(msg));
+      return true;
+    }
+    return false;
+  }
+
+  /**
    * Registers a new PrivilegeRequest in the analyzer.
    */
   public void registerPrivReq(PrivilegeRequest privReq) {
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
index 5c38d37..f0c31e2 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
@@ -4966,4 +4966,39 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
         tblProperties,
         "Only BUCKET and TRUNCATE partition transforms accept a parameter.");
   }
+
+  @Test
+  public void testPredicateHint() {
+    // Legal hint return correct results without warning, we currently only support
+    // 'ALWAYS_TRUE' hint.
+    AnalyzesOk("select * from tpch.lineitem where /* +ALWAYS_TRUE */ " +
+            "l_shipdate <= (select '1998-09-02')");
+
+    // Illegal hint return correct results with a warning.
+    AnalyzesOk("select * from tpch.lineitem where /* +ALWAYS_TRUE_TEST */ " +
+            "l_shipdate <= (select '1998-09-02')",
+        "Predicate hint not recognized: ALWAYS_TRUE_TEST");
+    AnalyzesOk("select * from tpch.lineitem where /* +ILLEGAL_HINT_TEST */ " +
+            "l_shipdate <= (select '1998-09-02')",
+        "Predicate hint not recognized: ILLEGAL_HINT_TEST");
+
+    // Multiple hints with legal and illegal hints also output a warning.
+    AnalyzesOk("select * from tpch.lineitem where /* +ALWAYS_TRUE,ILLEGAL_HINT_TEST */ "
+            + "l_shipdate <= (select '1998-09-02')",
+        "Predicate hint not recognized: ILLEGAL_HINT_TEST");
+
+    // Multiple illegal hints will output each hint warnings.
+    AnalyzesOk("select * from tpch.lineitem where " +
+            "/* +ILLEGAL_HINT_TEST1,ILLEGAL_HINT_TEST2,ILLEGAL_HINT_TEST3 */ " +
+            "l_shipdate <= (select '1998-09-02')",
+        "Predicate hint not recognized: ILLEGAL_HINT_TEST1");
+    AnalyzesOk("select * from tpch.lineitem where " +
+            "/* +ILLEGAL_HINT_TEST1,ILLEGAL_HINT_TEST2,ILLEGAL_HINT_TEST3 */ " +
+            "l_shipdate <= (select '1998-09-02')",
+        "Predicate hint not recognized: ILLEGAL_HINT_TEST2");
+    AnalyzesOk("select * from tpch.lineitem where " +
+            "/* +ILLEGAL_HINT_TEST1,ILLEGAL_HINT_TEST2,ILLEGAL_HINT_TEST3 */ " +
+            "l_shipdate <= (select '1998-09-02')",
+        "Predicate hint not recognized: ILLEGAL_HINT_TEST3");
+  }
 }