You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by wz...@apache.org on 2021/07/20 16:42:57 UTC

[impala] 01/03: IMPALA-10799: Analysis slowdown with inline views and thousands of column

This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit bd9b7459d0ab453fa185ba6728e5c571835ffa3e
Author: xqhe <he...@126.com>
AuthorDate: Fri Jul 16 15:52:26 2021 +0800

    IMPALA-10799: Analysis slowdown with inline views and thousands of column
    
    If there are thousands of columns in the inlineview, it‘s very slow in
    analysis. Most of the cost is in the get() calls used to find
    expressions in the local substitution map when checking if the column
    is ambiguous.
    
    The fix is to
    1.Use LinkedHashMap to search and check if we have already seen the alias.
    2.Do the check of checkComposedFrom() when the log level is TRACE since
    the codes have been mature for a while.
    
    Testing:
    Performance testing with a query with 10000 expressions of the
    following form:
      with a as (select c1 c1, c1 c2, c1 c3, ... from t)
      select c1, c2, c3, ... from a;
    repro query analysis went from 7.5 sec to less than 1 sec.
    
    Change-Id: I43da47dddfdb3db6d0e2073ae974a0a4d1b3ad7c
    Reviewed-on: http://gerrit.cloudera.org:8080/17688
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java | 2 +-
 fe/src/main/java/org/apache/impala/analysis/SelectStmt.java    | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java b/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java
index 0f30a40..2164bd0 100644
--- a/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java
@@ -267,8 +267,8 @@ public class InlineViewRef extends TableRef {
       LOG.trace("inline view " + getUniqueAlias() + " smap: " + smap_.debugString());
       LOG.trace("inline view " + getUniqueAlias() + " baseTblSmap: " +
           baseTblSmap_.debugString());
+      Preconditions.checkState(baseTblSmap_.checkComposedFrom(smap_));
     }
-    Preconditions.checkState(baseTblSmap_.checkComposedFrom(smap_));
 
     analyzeTableSample(analyzer);
     analyzeHints(analyzer);
diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index e153195..e5a7fa6 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -20,7 +20,9 @@ package org.apache.impala.analysis;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
+import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 
 import org.apache.impala.analysis.Path.PathType;
@@ -326,6 +328,9 @@ public class SelectStmt extends QueryStmt {
       selectList_.analyzePlanHints(analyzer_);
 
       // populate resultExprs_, aliasSmap_, and colLabels_
+      // This additional map is used for performance reasons and not for finding
+      // ambiguous alias.
+      Map<String, Expr> existingAliasExprs = new LinkedHashMap<>();
       for (int i = 0; i < selectList_.getItems().size(); ++i) {
         SelectListItem item = selectList_.getItems().get(i);
         if (item.isStar()) {
@@ -361,11 +366,13 @@ public class SelectStmt extends QueryStmt {
           resultExprs_.add(item.getExpr());
           String label = item.toColumnLabel(i, analyzer_.useHiveColLabels());
           SlotRef aliasRef = new SlotRef(label);
-          Expr existingAliasExpr = aliasSmap_.get(aliasRef);
+          Expr existingAliasExpr = existingAliasExprs.get(label);
           if (existingAliasExpr != null && !existingAliasExpr.equals(item.getExpr())) {
             // If we have already seen this alias, it refers to more than one column and
             // therefore is ambiguous.
             ambiguousAliasList_.add(aliasRef);
+          } else {
+            existingAliasExprs.put(label, item.getExpr());
           }
           aliasSmap_.put(aliasRef, item.getExpr().clone());
           colLabels_.add(label);