You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@impala.apache.org by st...@apache.org on 2022/04/05 08:57:36 UTC

[impala] branch master updated (efba58f5f -> 85ddd27b6)

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


    from efba58f5f IMPALA-10737: Optimize the number of Iceberg API Metadata requests
     new 32e2ace38 IMPALA-11038: Zipping unnest from view
     new 85ddd27b6 IMPALA-11204: Template implementation for OrcStringColumnReader::ReadValue

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/exec/orc-column-readers.cc                  |  78 +++-
 be/src/exec/orc-column-readers.h                   |  21 +-
 be/src/exec/subplan-node.cc                        |   2 +-
 be/src/exec/unnest-node.cc                         |   2 +-
 be/src/exec/unnest-node.h                          |   2 +-
 .../java/org/apache/impala/analysis/Analyzer.java  |  47 ++-
 .../apache/impala/analysis/CollectionTableRef.java |   9 +
 .../org/apache/impala/analysis/FromClause.java     |   4 +
 .../org/apache/impala/analysis/SelectStmt.java     |  93 ++++-
 .../java/org/apache/impala/analysis/SlotRef.java   |  22 +-
 .../java/org/apache/impala/analysis/TableRef.java  |   3 +
 .../org/apache/impala/analysis/UnnestExpr.java     |  85 +++-
 .../org/apache/impala/planner/HdfsScanNode.java    |   4 +-
 .../java/org/apache/impala/planner/PlanNode.java   |  31 ++
 .../apache/impala/planner/SingleNodePlanner.java   |   2 +
 .../apache/impala/planner/SingularRowSrcNode.java  |   3 +
 .../java/org/apache/impala/planner/UnnestNode.java |   7 +
 .../org/apache/impala/planner/PlannerTest.java     |   8 +
 .../queries/PlannerTest/zipping-unnest.test        | 206 ++++++++++
 .../QueryTest/zipping-unnest-from-view.test        | 428 ++++++++++++++++++++-
 .../QueryTest/zipping-unnest-in-select-list.test   |  36 +-
 21 files changed, 1040 insertions(+), 53 deletions(-)
 create mode 100644 testdata/workloads/functional-planner/queries/PlannerTest/zipping-unnest.test

[impala] 01/02: IMPALA-11038: Zipping unnest from view

Posted by st...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 32e2ace38df03b6a760d636154c0ca78304568de
Author: Gabor Kaszab <ga...@cloudera.com>
AuthorDate: Thu Nov 18 17:38:37 2021 +0100

    IMPALA-11038: Zipping unnest from view
    
    IMPALA-10920 introduced zipping unnest functionality for arrays that
    are in a table. This patch improves that support further by accepting
    inputs from views as well.
    
    Testing:
     - Added planner tests to verify which execution node handles the
       predicates on unnested items.
     - E2E tests for both unnesting syntaxes (select list and from clause)
       to cover when the source of the unnested arrays is not a table but a
       view. Also tested multi-level views and filtering the unnested items
       on different levels.
    
    Change-Id: I68f649dda9e41f257e7f6596193d07b24049f92a
    Reviewed-on: http://gerrit.cloudera.org:8080/18094
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Gabor Kaszab <ga...@cloudera.com>
---
 be/src/exec/subplan-node.cc                        |   2 +-
 be/src/exec/unnest-node.cc                         |   2 +-
 be/src/exec/unnest-node.h                          |   2 +-
 .../java/org/apache/impala/analysis/Analyzer.java  |  47 ++-
 .../apache/impala/analysis/CollectionTableRef.java |   9 +
 .../org/apache/impala/analysis/FromClause.java     |   4 +
 .../org/apache/impala/analysis/SelectStmt.java     |  93 ++++-
 .../java/org/apache/impala/analysis/SlotRef.java   |  22 +-
 .../java/org/apache/impala/analysis/TableRef.java  |   3 +
 .../org/apache/impala/analysis/UnnestExpr.java     |  85 +++-
 .../org/apache/impala/planner/HdfsScanNode.java    |   4 +-
 .../java/org/apache/impala/planner/PlanNode.java   |  31 ++
 .../apache/impala/planner/SingleNodePlanner.java   |   2 +
 .../apache/impala/planner/SingularRowSrcNode.java  |   3 +
 .../java/org/apache/impala/planner/UnnestNode.java |   7 +
 .../org/apache/impala/planner/PlannerTest.java     |   8 +
 .../queries/PlannerTest/zipping-unnest.test        | 206 ++++++++++
 .../QueryTest/zipping-unnest-from-view.test        | 428 ++++++++++++++++++++-
 .../QueryTest/zipping-unnest-in-select-list.test   |  36 +-
 19 files changed, 945 insertions(+), 49 deletions(-)

diff --git a/be/src/exec/subplan-node.cc b/be/src/exec/subplan-node.cc
index 26b7b4c93..c38ce35b2 100644
--- a/be/src/exec/subplan-node.cc
+++ b/be/src/exec/subplan-node.cc
@@ -44,7 +44,7 @@ Status SubplanPlanNode::SetContainingSubplan(
   } else {
     if (node->tnode_->node_type == TPlanNodeType::UNNEST_NODE) {
       UnnestPlanNode* unnest_node = reinterpret_cast<UnnestPlanNode*>(node);
-      RETURN_IF_ERROR(unnest_node->InitCollExpr(state));
+      RETURN_IF_ERROR(unnest_node->InitCollExprs(state));
     }
     int num_children = node->children_.size();
     for (int i = 0; i < num_children; ++i) {
diff --git a/be/src/exec/unnest-node.cc b/be/src/exec/unnest-node.cc
index a1f75f5bb..36f084d72 100644
--- a/be/src/exec/unnest-node.cc
+++ b/be/src/exec/unnest-node.cc
@@ -48,7 +48,7 @@ void UnnestPlanNode::Close() {
   PlanNode::Close();
 }
 
-Status UnnestPlanNode::InitCollExpr(FragmentState* state) {
+Status UnnestPlanNode::InitCollExprs(FragmentState* state) {
   DCHECK(containing_subplan_ != nullptr)
       << "set_containing_subplan() must have been called";
   const RowDescriptor& row_desc = *containing_subplan_->children_[0]->row_descriptor_;
diff --git a/be/src/exec/unnest-node.h b/be/src/exec/unnest-node.h
index deea0264d..fbe3194ee 100644
--- a/be/src/exec/unnest-node.h
+++ b/be/src/exec/unnest-node.h
@@ -33,7 +33,7 @@ class UnnestPlanNode : public PlanNode {
   virtual Status CreateExecNode(RuntimeState* state, ExecNode** node) const override;
   /// Initializes the expressions that produce the collections to be unnested.
   /// Called by the containing subplan plan-node.
-  Status InitCollExpr(FragmentState* state);
+  Status InitCollExprs(FragmentState* state);
 
   ~UnnestPlanNode(){}
 
diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index 5cdadbff1..5fcc742bb 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -513,15 +513,19 @@ public class Analyzer {
     public final Map<String, org.apache.kudu.client.KuduTable> kuduTables =
         new HashMap<>();
 
-    // This holds the tuple id's of the arrays that are given as a zipping unnest table
-    // ref.
-    public Set<TupleId> zippingUnnestTupleIds = new HashSet<>();
-
     // This holds the nullable side slot ids from the outer join's equi-join conjuncts
     // e.g. t1 left join t2 on t1.id = t2.id, the slot id of t2.id will be added to
     // this set.
     public Set<SlotId> ojNullableSlotsInEquiPreds = new HashSet<>();
 
+    // This holds the tuple id's of the arrays that are given as a zipping unnest table
+    // ref. If the table ref is originated from a view then also add the tuple IDs for the
+    // respective table refs from the view.
+    public Set<TupleId> zippingUnnestTupleIds = new HashSet<>();
+
+    // Shows how many zipping unnests were in the query;
+    public int numZippingUnnests = 0;
+
     public GlobalState(StmtTableCache stmtTableCache, TQueryCtx queryCtx,
         AuthorizationFactory authzFactory, AuthorizationContext authzCtx) {
       this.stmtTableCache = stmtTableCache;
@@ -725,10 +729,16 @@ public class Analyzer {
     }
   }
 
-  public boolean isRegisteredTableRef(TableRef ref) {
-    if (ref == null) return false;
-    String uniqueAlias = ref.getUniqueAlias();
-    return aliasMap_.containsKey(uniqueAlias);
+  /**
+   * Checks if a table ref has already been registered in this analyzer and returns it.
+   * Uses the unique alias from the table ref for the check. Returns null if the table
+   * ref has not been registered.
+   */
+  public TableRef getRegisteredTableRef(String uniqueAlias) {
+    if (uniqueAlias == null) return null;
+    TupleDescriptor tupleDesc = aliasMap_.get(uniqueAlias);
+    if (tupleDesc == null) return null;
+    return tableRefMap_.get(tupleDesc.getId());
   }
 
   /**
@@ -787,6 +797,10 @@ public class Analyzer {
     tableRefMap_.put(desc.getId(), ref);
   }
 
+  public void addAlias(String alias, TupleDescriptor desc) {
+    aliasMap_.put(alias, desc);
+  }
+
   /**
    * Resolves the given TableRef into a concrete BaseTableRef, ViewRef or
    * CollectionTableRef. Returns the new resolved table ref or the given table
@@ -998,6 +1012,11 @@ public class Analyzer {
 
   public void addZippingUnnestTupleId(CollectionTableRef tblRef) {
     Expr collExpr = tblRef.getCollectionExpr();
+    addZippingUnnestTupleId(collExpr);
+  }
+
+  public void addZippingUnnestTupleId(Expr collExpr) {
+    if (collExpr == null) return;
     if (!(collExpr instanceof SlotRef)) return;
     SlotRef slotCollExpr = (SlotRef)collExpr;
     SlotDescriptor collSlotDesc = slotCollExpr.getDesc();
@@ -1007,10 +1026,22 @@ public class Analyzer {
     globalState_.zippingUnnestTupleIds.add(collTupleDesc.getId());
   }
 
+  public void addZippingUnnestTupleId(TupleId tid) {
+    globalState_.zippingUnnestTupleIds.add(tid);
+  }
+
   public Set<TupleId> getZippingUnnestTupleIds() {
     return globalState_.zippingUnnestTupleIds;
   }
 
+  public void increaseZippingUnnestCount() {
+    ++globalState_.numZippingUnnests;
+  }
+
+  public int getNumZippingUnnests() {
+    return globalState_.numZippingUnnests;
+  }
+
   /**
    * Returns the descriptor of the given explicit or implicit table alias or null if no
    * such alias has been registered.
diff --git a/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java b/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java
index 962009ac1..f22bb2609 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java
@@ -100,6 +100,13 @@ public class CollectionTableRef extends TableRef {
     if (resolvedPath_.getRootDesc() != null) {
       sourceView = resolvedPath_.getRootDesc().getSourceView();
     }
+    if (sourceView != null && zippingUnnestType_ ==
+        ZippingUnnestType.FROM_CLAUSE_ZIPPING_UNNEST) {
+      String implicitAlias = rawPath_.get(rawPath_.size() - 1).toLowerCase();
+      analyzer.addZippingUnnestTupleId(analyzer.getDescriptor(implicitAlias).getId());
+      TableRef existingTableRef = analyzer.getRegisteredTableRef(getUniqueAlias());
+      existingTableRef.getDesc().setHidden(false);
+    }
     if (sourceView == null || inSelectList_) {
       desc_ = analyzer.registerTableRef(this);
       // Avoid polluting the namespace with collections that back arrays
@@ -195,6 +202,8 @@ public class CollectionTableRef extends TableRef {
 
   public Expr getCollectionExpr() { return collectionExpr_; };
 
+  @Override
+  public boolean isCollectionInSelectList() { return inSelectList_; }
   public void setInSelectList(boolean value) { inSelectList_ = value; }
 
   @Override
diff --git a/fe/src/main/java/org/apache/impala/analysis/FromClause.java b/fe/src/main/java/org/apache/impala/analysis/FromClause.java
index f91dbfc70..dbe60aaf7 100644
--- a/fe/src/main/java/org/apache/impala/analysis/FromClause.java
+++ b/fe/src/main/java/org/apache/impala/analysis/FromClause.java
@@ -105,6 +105,7 @@ public class FromClause extends StmtNode implements Iterable<TableRef> {
           }
           if (firstZippingUnnestRef == null) firstZippingUnnestRef = tblRef;
           analyzer.addZippingUnnestTupleId((CollectionTableRef)tblRef);
+          analyzer.increaseZippingUnnestCount();
         }
       }
     }
@@ -146,6 +147,9 @@ public class FromClause extends StmtNode implements Iterable<TableRef> {
   private void checkTopLevelComplexAcidScan(Analyzer analyzer,
       CollectionTableRef collRef) {
     if (collRef.getCollectionExpr() != null) return;
+    // Don't do any checks of the collection that came from a view as getTable() would
+    // return null in that case.
+    if (collRef.getTable() == null) return;
     if (!AcidUtils.isFullAcidTable(
         collRef.getTable().getMetaStoreTable().getParameters())) {
       return;
diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index f6d5f7b5c..409235933 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -26,6 +26,7 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.impala.analysis.Path.PathType;
+import org.apache.impala.analysis.TableRef.ZippingUnnestType;
 import org.apache.impala.authorization.Privilege;
 import org.apache.impala.catalog.ArrayType;
 import org.apache.impala.catalog.Column;
@@ -149,6 +150,8 @@ public class SelectStmt extends QueryStmt {
   public boolean hasHavingClause() { return havingClause_ != null; }
   public ExprSubstitutionMap getBaseTblSmap() { return baseTblSmap_; }
 
+  public void addToFromClause(TableRef ref) { fromClause_.add(ref); }
+
   /**
    * A simple limit statement has a limit but no order-by,
    * group-by, aggregates or analytic functions. Joins are
@@ -287,6 +290,8 @@ public class SelectStmt extends QueryStmt {
       analyzeWhereClause();
       createSortInfo(analyzer_);
 
+      setZippingUnnestSlotRefsFromViews();
+
       // Analyze aggregation-relevant components of the select block (Group By
       // clause, select list, Order By clause), substitute AVG with SUM/COUNT,
       // create the AggregationInfo, including the agg output tuple, and transform
@@ -446,29 +451,77 @@ public class SelectStmt extends QueryStmt {
             "WHERE clause must not contain analytic expressions: " + e.toSql());
       }
 
-      // Don't allow a WHERE conjunct on an array item that is part of a zipping unnest.
-      // In case there is only one zipping unnested array this restriction is not needed
-      // as the UNNEST node has to handle a single array and it's safe to do the filtering
-      // in the scanner.
-      Set<TupleId> zippingUnnestTupleIds = analyzer_.getZippingUnnestTupleIds();
-      if (zippingUnnestTupleIds.size() > 1) {
-        for (Expr expr : whereClause_.getChildren()) {
-          if (expr == null || !(expr instanceof SlotRef)) continue;
-          SlotRef slotRef = (SlotRef)expr;
-          for (TupleId tid : zippingUnnestTupleIds) {
-            TupleDescriptor collTupleDesc = analyzer_.getTupleDesc(tid);
-            // If there is no slot ref for the collection tuple then there is no need to
-            // check.
-            if (collTupleDesc.getSlots().size() == 0) continue;
-            Preconditions.checkState(collTupleDesc.getSlots().size() == 1);
-            if (slotRef.getDesc().equals(collTupleDesc.getSlots().get(0))) {
-              throw new AnalysisException("Not allowed to add a filter on an unnested " +
-                  "array under the same select statement: " + expr.toSql());
+      verifyZippingUnnestSlots();
+
+      analyzer_.registerConjuncts(whereClause_, false);
+    }
+
+    /**
+     * Don't allow a WHERE conjunct on an array item that is part of a zipping unnest.
+     * In case there is only one zipping unnested array this restriction is not needed
+     * as the UNNEST node has to handle a single array and it's safe to do the filtering
+     * in the scanner.
+     */
+    private void verifyZippingUnnestSlots() throws AnalysisException {
+      if (analyzer_.getNumZippingUnnests() <= 1) return;
+      List<TupleId> zippingUnnestTupleIds = Lists.newArrayList(
+          analyzer_.getZippingUnnestTupleIds());
+
+      List<SlotRef> slotRefsInWhereClause = new ArrayList<>();
+      whereClause_.collect(SlotRef.class, slotRefsInWhereClause);
+      for (SlotRef slotRef : slotRefsInWhereClause) {
+        if (slotRef.isBoundByTupleIds(zippingUnnestTupleIds)) {
+          throw new AnalysisException("Not allowed to add a filter on an unnested " +
+              "array under the same select statement: " + slotRef.toSql());
+        }
+      }
+    }
+
+    /**
+     * When zipping unnest is performed using the SQL standard compliant syntax (where
+     * the unnest is in the FROM clause) the SlotRefs used for the zipping unnest are not
+     * UnnestExprs as with the other approach (where zipping unnest is in the select list)
+     * but regular SlotRefs. As a result they have to be marked so that later on anything
+     * specific for zipping unnest could be executed for them.
+     * This function identifies the SlotRefs that are for zipping unnesting and sets a
+     * flag for them. Note, only marks the SlotRefs that are originated form a view.
+     */
+    private void setZippingUnnestSlotRefsFromViews() {
+      for (TableRef tblRef : fromClause_.getTableRefs()) {
+        if (!tblRef.isFromClauseZippingUnnest()) continue;
+        Preconditions.checkState(tblRef instanceof CollectionTableRef);
+        ExprSubstitutionMap exprSubMap = getBaseTableSMapFromTableRef(tblRef);
+        if (exprSubMap == null) continue;
+
+        for (SelectListItem item : selectList_.getItems()) {
+          if (item.isStar()) continue;
+          Expr itemExpr = item.getExpr();
+          List<SlotRef> slotRefs = new ArrayList<>();
+          itemExpr.collect(SlotRef.class, slotRefs);
+          for (SlotRef slotRef : slotRefs) {
+            Expr subbedExpr = exprSubMap.get(slotRef);
+            if (subbedExpr == null || !(subbedExpr instanceof SlotRef)) continue;
+            SlotRef subbedSlotRef = (SlotRef)subbedExpr;
+            CollectionTableRef collTblRef = (CollectionTableRef)tblRef;
+            SlotRef collectionSlotRef = (SlotRef)collTblRef.getCollectionExpr();
+            // Check if 'slotRef' is originated from 'collectionSlotRef'.
+            if (subbedSlotRef.getDesc().getParent().getId() ==
+                collectionSlotRef.getDesc().getItemTupleDesc().getId()) {
+              slotRef.setIsZippingUnnest(true);
             }
           }
         }
       }
-      analyzer_.registerConjuncts(whereClause_, false);
+    }
+
+    /**
+     * If 'tblRef' is originated from a view then returns the baseTblSmap from the view.
+     * Returns false otherwise.
+     */
+    private ExprSubstitutionMap getBaseTableSMapFromTableRef(TableRef tblRef) {
+      if (tblRef.getResolvedPath().getRootDesc() == null) return null;
+      if (tblRef.getResolvedPath().getRootDesc().getSourceView() == null) return null;
+      return tblRef.getResolvedPath().getRootDesc().getSourceView().getBaseTblSmap();
     }
 
     /**
@@ -522,7 +575,7 @@ public class SelectStmt extends QueryStmt {
           continue;
         // Don't push down the "is not empty" predicate for zipping unnests if there are
         // multiple zipping unnests in the FROM clause.
-        if (tblRef.isZippingUnnest() && analyzer_.getZippingUnnestTupleIds().size() > 1) {
+        if (tblRef.isZippingUnnest() && analyzer_.getNumZippingUnnests() > 1) {
           continue;
         }
         IsNotEmptyPredicate isNotEmptyPred =
diff --git a/fe/src/main/java/org/apache/impala/analysis/SlotRef.java b/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
index c1409b7b6..27438b897 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
@@ -23,7 +23,6 @@ import java.util.Set;
 
 import org.apache.impala.analysis.Path.PathType;
 import org.apache.impala.catalog.FeFsTable;
-import org.apache.impala.catalog.ColumnStats;
 import org.apache.impala.catalog.FeTable;
 import org.apache.impala.catalog.HdfsFileFormat;
 import org.apache.impala.catalog.StructField;
@@ -43,14 +42,19 @@ import com.google.common.base.Preconditions;
 
 public class SlotRef extends Expr {
   protected List<String> rawPath_;
-  private final String label_;  // printed in toSql()
+  protected final String label_;  // printed in toSql()
 
   // Results of analysis.
-  private SlotDescriptor desc_;
+  protected SlotDescriptor desc_;
 
   // The resolved path after resolving 'rawPath_'.
   protected Path resolvedPath_ = null;
 
+  // Indicates if this SlotRef is coming from zipping unnest where the unest is given in
+  // the FROM clause. Note, when the unnest in in the select list then an UnnestExpr would
+  // be used instead of a SlotRef.
+  protected boolean isZippingUnnest_ = false;
+
   public SlotRef(List<String> rawPath) {
     super();
     rawPath_ = rawPath;
@@ -103,6 +107,7 @@ public class SlotRef extends Expr {
     }
     label_ = other.label_;
     desc_ = other.desc_;
+    isZippingUnnest_ = other.isZippingUnnest_;
   }
 
   /**
@@ -300,6 +305,8 @@ public class SlotRef extends Expr {
     return desc_.getPath();
   }
 
+  public void setIsZippingUnnest(boolean b) { isZippingUnnest_ = b; }
+
   @Override
   public String toSqlImpl(ToSqlOptions options) {
     if (label_ != null) return label_;
@@ -366,6 +373,15 @@ public class SlotRef extends Expr {
   @Override
   public boolean isBoundByTupleIds(List<TupleId> tids) {
     Preconditions.checkState(desc_ != null);
+    // If this SlotRef is coming from zipping unnest then try to do a similar check as
+    // UnnestExpr does.
+    if (isZippingUnnest_ && desc_.getParent() != null &&
+        desc_.getParent().getRootDesc() != null) {
+      TupleId parentId = desc_.getParent().getRootDesc().getId();
+      for (TupleId tid: tids) {
+        if (tid.equals(parentId)) return true;
+      }
+    }
     for (TupleId tid: tids) {
       if (tid.equals(desc_.getParent().getId())) return true;
     }
diff --git a/fe/src/main/java/org/apache/impala/analysis/TableRef.java b/fe/src/main/java/org/apache/impala/analysis/TableRef.java
index 0a9b9f358..ed3d813df 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TableRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TableRef.java
@@ -403,6 +403,9 @@ public class TableRef extends StmtNode {
   public boolean isAnalyzed() { return isAnalyzed_; }
   public boolean isResolved() { return !getClass().equals(TableRef.class); }
 
+  public boolean isFromClauseZippingUnnest() {
+    return zippingUnnestType_ == ZippingUnnestType.FROM_CLAUSE_ZIPPING_UNNEST;
+  }
   public boolean isZippingUnnest() {
     return zippingUnnestType_ != ZippingUnnestType.NONE;
   }
diff --git a/fe/src/main/java/org/apache/impala/analysis/UnnestExpr.java b/fe/src/main/java/org/apache/impala/analysis/UnnestExpr.java
index 6d63ff358..ec3355077 100644
--- a/fe/src/main/java/org/apache/impala/analysis/UnnestExpr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/UnnestExpr.java
@@ -22,7 +22,6 @@ import org.apache.impala.analysis.TableRef.ZippingUnnestType;
 import org.apache.impala.catalog.TableLoadingException;
 import org.apache.impala.catalog.Type;
 import org.apache.impala.common.AnalysisException;
-import org.apache.impala.thrift.TExprNode;
 
 import com.google.common.base.Preconditions;
 import java.util.ArrayList;
@@ -40,6 +39,15 @@ public class UnnestExpr extends SlotRef {
 
   protected UnnestExpr(UnnestExpr other) {
     super(other);
+    // Removing "item" from the end of the path is required so that re-analyze will work
+    // as well after adding "item" to the path in the first round of analysis here in the
+    // unnest.
+    removeItemFromPath();
+    rawPathWithoutItem_ = other.rawPathWithoutItem_;
+  }
+
+  protected UnnestExpr(SlotDescriptor desc) {
+    super(desc);
   }
 
   @Override
@@ -54,11 +62,13 @@ public class UnnestExpr extends SlotRef {
     // find the corresponding CollectionTableRef during resolution.
     Path resolvedPath = resolveAndVerifyRawPath(analyzer);
     Preconditions.checkNotNull(resolvedPath);
+    if (!rawPathWithoutItem_.isEmpty()) rawPathWithoutItem_.clear();
     rawPathWithoutItem_.addAll(rawPath_);
 
     List<String> tableRefRawPath = constructRawPathForTableRef(resolvedPath);
     Preconditions.checkNotNull(tableRefRawPath);
-    createAndRegisterCollectionTableRef(tableRefRawPath, analyzer);
+    CollectionTableRef tblRef = createAndRegisterCollectionTableRef(tableRefRawPath,
+        analyzer);
 
     // 'rawPath_' points to an array and we need a SlotRef to refer to the item of the
     // array. Hence, adding "item" to the end of the path.
@@ -69,12 +79,14 @@ public class UnnestExpr extends SlotRef {
       rawPath_ = rawPath_.subList(rawPath_.size() - 2, rawPath_.size());
     }
     super.analyzeImpl(analyzer);
+    Preconditions.checkState(tblRef.desc_.getSlots().size() == 1);
+    analyzer.addZippingUnnestTupleId(desc_.getParent().getId());
   }
 
   private void verifyTableRefs(Analyzer analyzer) throws AnalysisException {
     for (TableRef ref : analyzer.getTableRefs().values()) {
       if (ref instanceof CollectionTableRef) {
-        if (!ref.isZippingUnnest()) {
+        if (!ref.isZippingUnnest() && !ref.isCollectionInSelectList()) {
           throw new AnalysisException(
               "Providing zipping and joining unnests together is not supported.");
         } else if (ref.getZippingUnnestType() ==
@@ -119,22 +131,40 @@ public class UnnestExpr extends SlotRef {
     return tableRefRawPath;
   }
 
-  private void createAndRegisterCollectionTableRef(List<String> tableRefRawPath,
-      Analyzer analyzer) throws AnalysisException {
-    TableRef tblRef = new TableRef(tableRefRawPath, null);
+  private CollectionTableRef createAndRegisterCollectionTableRef(
+      List<String> tableRefRawPath, Analyzer analyzer) throws AnalysisException {
+    String alias = "";
+    if (rawPath_ != null && rawPath_.size() > 0) {
+      alias = rawPath_.get(rawPath_.size() - 1);
+    }
+    TableRef tblRef = new TableRef(tableRefRawPath, alias);
     tblRef = analyzer.resolveTableRef(tblRef);
     Preconditions.checkState(tblRef instanceof CollectionTableRef);
     tblRef.setZippingUnnestType(ZippingUnnestType.SELECT_LIST_ZIPPING_UNNEST);
-    if (!analyzer.isRegisteredTableRef(tblRef)) {
+    TableRef existingTblRef = analyzer.getRegisteredTableRef(tblRef.getUniqueAlias());
+    if (existingTblRef == null) {
       tblRef.analyze(analyzer);
-      // This just registers the tbl ref to be added to the FROM clause because it's not
-      // available here. Note, SelectStmt will add it to the FROM clause during analysis.
+      // This just registers the table ref in the analyzer to be added to the FROM clause
+      // because the FROM clause is not available here. Note, a query rewrite will add it
+      // eventually before a re-analysis.
       analyzer.addTableRefFromUnnestExpr((CollectionTableRef)tblRef);
+      return (CollectionTableRef)tblRef;
+    } else if (existingTblRef.isCollectionInSelectList() ||
+        existingTblRef.getZippingUnnestType() == ZippingUnnestType.NONE) {
+      Preconditions.checkState(existingTblRef instanceof CollectionTableRef);
+      // This case happens when unnesting an array that comes from a view where the array
+      // is present in the select list of the view. Here the view has already registered
+      // the table ref to the analyzer.
+      existingTblRef.setZippingUnnestType(ZippingUnnestType.SELECT_LIST_ZIPPING_UNNEST);
+      analyzer.addTableRefFromUnnestExpr((CollectionTableRef)existingTblRef);
     }
+    existingTblRef.setHidden(false);
+    existingTblRef.getDesc().setHidden(false);
+    return (CollectionTableRef)existingTblRef;
   }
 
   private void removeItemFromPath() {
-    Preconditions.checkNotNull(rawPath_);
+    if (rawPath_ == null || rawPath_.isEmpty()) return;
     if (rawPath_.get(rawPath_.size() - 1).equals("item")) {
       rawPath_.remove(rawPath_.size() - 1);
     }
@@ -145,6 +175,39 @@ public class UnnestExpr extends SlotRef {
 
   @Override
   public String toSqlImpl(ToSqlOptions options) {
-    return "UNNEST(" + ToSqlUtils.getPathSql(rawPathWithoutItem_)  + ")";
+    Preconditions.checkState(isAnalyzed());
+    String label = "";
+    if (rawPathWithoutItem_ != null) label = ToSqlUtils.getPathSql(rawPathWithoutItem_);
+    if (label.equals("") && label_ != null) {
+      // If 'rawPathWithoutItem_' is null then this slot is from a view and we have to
+      // use 'label_' for displaying purposes.
+      if (label_.endsWith(".item")) label = label_.substring(0, label_.length() - 5);
+    }
+    return "UNNEST(" + label  + ")";
+  }
+
+  @Override
+  protected Expr substituteImpl(ExprSubstitutionMap smap, Analyzer analyzer) {
+    if (smap == null) return this;
+    SlotRef slotRef = new SlotRef(this.desc_);
+    Expr substExpr = smap.get(slotRef);
+    if (substExpr == null) {
+      UnnestExpr unnestExpr = new UnnestExpr(this.desc_);
+      substExpr = smap.get(unnestExpr);
+      if (substExpr == null) return this;
+      return substExpr;
+    }
+
+    return new UnnestExpr(((SlotRef)substExpr).getDesc());
+  }
+
+  @Override
+  public boolean isBoundByTupleIds(List<TupleId> tids) {
+    Preconditions.checkState(desc_ != null);
+    TupleId parentId = desc_.getParent().getRootDesc().getId();
+    for (TupleId tid: tids) {
+      if (tid.equals(parentId)) return true;
+    }
+    return super.isBoundByTupleIds(tids);
   }
 }
\ No newline at end of file
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index 9d33158f1..a5dba3831 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -948,8 +948,8 @@ public class HdfsScanNode extends ScanNode {
       // FROM clause then avoid pushing down conjunct for this slot to the scanner as it
       // would result incorrect results on that slot after performing the unnest.
       // One exception is when there is only one such table reference in the FROM clause.
-      Set<TupleId> zippingUnnestTupleIds = analyzer.getZippingUnnestTupleIds();
-      if (zippingUnnestTupleIds.size() > 1 && zippingUnnestTupleIds.contains(itemTid)) {
+      if (analyzer.getNumZippingUnnests() > 1 &&
+          analyzer.getZippingUnnestTupleIds().contains(itemTid)) {
         continue;
       }
 
diff --git a/fe/src/main/java/org/apache/impala/planner/PlanNode.java b/fe/src/main/java/org/apache/impala/planner/PlanNode.java
index e745ca675..b59acedc3 100644
--- a/fe/src/main/java/org/apache/impala/planner/PlanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/PlanNode.java
@@ -21,6 +21,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Iterator;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
@@ -32,6 +33,7 @@ import org.apache.impala.analysis.Expr;
 import org.apache.impala.analysis.ExprId;
 import org.apache.impala.analysis.ExprSubstitutionMap;
 import org.apache.impala.analysis.SlotDescriptor;
+import org.apache.impala.analysis.SlotRef;
 import org.apache.impala.analysis.ToSqlOptions;
 import org.apache.impala.analysis.TupleDescriptor;
 import org.apache.impala.analysis.TupleId;
@@ -495,6 +497,30 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
     msg.num_children = numChildren;
   }
 
+  /**
+   * If there are more than one array to be zipping unnested in the 'analyzer' then
+   * removes the conjuncts related to the unnested array item from the 'conjuncts' list.
+   * This could be useful to prevent e.g. ScanNode or SingularRowSrc to pick up the
+   * conjuncts for zipping unnested arrays and let the UnnestNode to take care of them.
+   */
+  public static void removeZippingUnnestConjuncts(List<Expr> conjuncts,
+      Analyzer analyzer) {
+    Set<TupleId> zippingUnnestTupleIds = analyzer.getZippingUnnestTupleIds();
+
+    Iterator<Expr> it = conjuncts.iterator();
+    while(it.hasNext()) {
+      Expr e = it.next();
+      List<SlotRef> slotRefs = new ArrayList<>();
+      e.collect(SlotRef.class, slotRefs);
+      for (SlotRef slotRef : slotRefs) {
+        if (zippingUnnestTupleIds.contains(slotRef.getDesc().getParent().getId())) {
+          it.remove();
+          break;
+        }
+      }
+    }
+  }
+
   /**
    * Computes the full internal state, including smap and planner-relevant statistics
    * (calls computeStats()), marks all slots referenced by this node as materialized
@@ -515,10 +541,15 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
    */
   protected void assignConjuncts(Analyzer analyzer) {
     List<Expr> unassigned = analyzer.getUnassignedConjuncts(this);
+    if (!shouldPickUpZippingUnnestConjuncts()) {
+      removeZippingUnnestConjuncts(unassigned, analyzer);
+    }
     conjuncts_.addAll(unassigned);
     analyzer.markConjunctsAssigned(unassigned);
   }
 
+  protected boolean shouldPickUpZippingUnnestConjuncts() { return true; }
+
   /**
    * Apply the provided conjuncts to the this node, returning the new root of
    * the plan tree. Also add any slot equivalences for tupleIds that have not
diff --git a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
index 46ab74914..734885fbe 100644
--- a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
@@ -1849,6 +1849,8 @@ public class SingleNodePlanner {
 
     // Also add remaining unassigned conjuncts
     List<Expr> unassigned = analyzer.getUnassignedConjuncts(tid.asList());
+    PlanNode.removeZippingUnnestConjuncts(unassigned, analyzer);
+
     conjuncts.addAll(unassigned);
     analyzer.markConjunctsAssigned(unassigned);
     analyzer.createEquivConjuncts(tid, conjuncts);
diff --git a/fe/src/main/java/org/apache/impala/planner/SingularRowSrcNode.java b/fe/src/main/java/org/apache/impala/planner/SingularRowSrcNode.java
index e9c498417..bef43b783 100644
--- a/fe/src/main/java/org/apache/impala/planner/SingularRowSrcNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/SingularRowSrcNode.java
@@ -88,4 +88,7 @@ public class SingularRowSrcNode extends PlanNode {
   protected void toThrift(TPlanNode msg) {
     msg.node_type = TPlanNodeType.SINGULAR_ROW_SRC_NODE;
   }
+
+  @Override
+  protected boolean shouldPickUpZippingUnnestConjuncts() { return false; }
 }
diff --git a/fe/src/main/java/org/apache/impala/planner/UnnestNode.java b/fe/src/main/java/org/apache/impala/planner/UnnestNode.java
index 3aa7e8641..e5247af0f 100644
--- a/fe/src/main/java/org/apache/impala/planner/UnnestNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/UnnestNode.java
@@ -17,12 +17,14 @@
 
 package org.apache.impala.planner;
 
+import java.util.Comparator;
 import java.util.List;
 
 import org.apache.impala.analysis.Analyzer;
 import org.apache.impala.analysis.CollectionTableRef;
 import org.apache.impala.analysis.Expr;
 import org.apache.impala.analysis.SlotRef;
+import org.apache.impala.analysis.ToSqlUtils;
 import org.apache.impala.common.ImpalaException;
 import org.apache.impala.thrift.TExplainLevel;
 import org.apache.impala.thrift.TPlanNode;
@@ -120,6 +122,11 @@ public class UnnestNode extends PlanNode {
   protected String getDisplayLabelDetail() {
     StringBuilder strBuilder = new StringBuilder();
     boolean first = true;
+    tblRefs_.sort( (CollectionTableRef t1, CollectionTableRef t2) -> {
+      String path1 = ToSqlUtils.getPathSql(t1.getPath());
+      String path2 = ToSqlUtils.getPathSql(t2.getPath());
+      return path1.compareTo(path2);
+    });
     for (CollectionTableRef tblRef : tblRefs_) {
       if (!first) strBuilder.append(", ");
       strBuilder.append(Joiner.on(".").join(tblRef.getPath()));
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index 2b8bccd21..fb395ea61 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -351,6 +351,14 @@ public class PlannerTest extends PlannerTestBase {
     runPlannerTestFile("complex-types-file-formats");
   }
 
+  @Test
+  public void testZippingUnnest() {
+    addTestDb("test_zipping_unnest_db", "For creating views for zipping unnest queries.");
+    addTestView("create view test_zipping_unnest_db.view_arrays as " +
+        "select id, arr1, arr2 from functional_parquet.complextypes_arrays");
+    runPlannerTestFile("zipping-unnest");
+  }
+
   @Test
   public void testJoins() {
     TQueryOptions options = defaultQueryOptions();
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/zipping-unnest.test b/testdata/workloads/functional-planner/queries/PlannerTest/zipping-unnest.test
new file mode 100644
index 000000000..a9672637d
--- /dev/null
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/zipping-unnest.test
@@ -0,0 +1,206 @@
+# Check that for a single unnest the predicate on the unnested items is pushed to the
+# Scan node.
+select id, item from (
+    select id, unnest(arr1) from functional_parquet.complextypes_arrays) x
+where item < 5;
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+|  row-size=20B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+|  |  row-size=20B cardinality=10
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     row-size=16B cardinality=1
+|  |
+|  03:UNNEST [functional_parquet.complextypes_arrays.arr1 arr1]
+|     row-size=4B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays]
+   HDFS partitions=1/1 files=1 size=1.06KB
+   predicates: !empty(functional_parquet.complextypes_arrays.arr1)
+   predicates on arr1: UNNEST(arr1) < 5, arr1.item < 5
+   row-size=16B cardinality=1.35K
+====
+# Similar as above but using the FROM clause syntax for zipping unnest.
+select id, unnest1 from (
+    select id, arr1.item as unnest1
+    from functional_parquet.complextypes_arrays a, unnest(a.arr1)) x
+where unnest1 < 5;
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+|  row-size=20B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+|  |  row-size=20B cardinality=10
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     row-size=16B cardinality=1
+|  |
+|  03:UNNEST [a.arr1]
+|     row-size=4B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays a]
+   HDFS partitions=1/1 files=1 size=1.06KB
+   predicates: !empty(a.arr1)
+   predicates on a.arr1: arr1.item < 5
+   row-size=16B cardinality=1.35K
+====
+# Check that for a single unnest the predicate on the unnested items is pushed to the
+# Scan node. Queries a view instead of a table.
+select id, item from (
+    select id, unnest(arr1) from test_zipping_unnest_db.view_arrays) x
+where item < 5;
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+|  row-size=20B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+|  |  row-size=20B cardinality=10
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     row-size=16B cardinality=1
+|  |
+|  03:UNNEST [test_zipping_unnest_db.view_arrays.arr1]
+|     row-size=4B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays]
+   HDFS partitions=1/1 files=1 size=1.06KB
+   predicates: !empty(arr1)
+   predicates on arr1: UNNEST(arr1) < 5, arr1.item < 5
+   row-size=16B cardinality=1.35K
+====
+# Similar as above but using the FROM clause syntax for zipping unnest.
+select id, unnest1 from (
+    select id, arr1.item as unnest1
+    from test_zipping_unnest_db.view_arrays a, unnest(a.arr1)) x
+where unnest1 < 5;
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+|  row-size=20B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+|  |  row-size=20B cardinality=10
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     row-size=16B cardinality=1
+|  |
+|  03:UNNEST [a.arr1]
+|     row-size=4B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays]
+   HDFS partitions=1/1 files=1 size=1.06KB
+   predicates: !empty(arr1)
+   predicates on arr1: arr1.item < 5
+   row-size=16B cardinality=1.35K
+====
+# Check that for multiple unnests the predicate on the unnested items is pushed to the
+# Unnest node instead of the Scan node.
+select id, unnest1 from (
+    select id, unnest(arr1) unnest1, unnest(arr2) unnest2
+    from functional_parquet.complextypes_arrays) x
+where unnest1 < 5;
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+|  row-size=20B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+|  |  row-size=20B cardinality=10
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     row-size=16B cardinality=1
+|  |
+|  03:UNNEST [functional_parquet.complextypes_arrays.arr1 arr1, functional_parquet.complextypes_arrays.arr2 arr2]
+|     predicates: UNNEST(arr1) < 5
+|     row-size=4B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays]
+   HDFS partitions=1/1 files=1 size=1.06KB
+   row-size=16B cardinality=1.35K
+====
+# Similar as above but using the FROM clause syntax for zipping unnest.
+select id, unnest1 from (
+    select id, arr1.item as unnest1, arr2.item as unnest2
+    from functional_parquet.complextypes_arrays a, unnest(a.arr1, a.arr2)) x
+where unnest1 < 5;
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+|  row-size=20B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+|  |  row-size=20B cardinality=10
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     row-size=16B cardinality=1
+|  |
+|  03:UNNEST [a.arr1, a.arr2]
+|     predicates: arr1.item < 5
+|     row-size=4B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays a]
+   HDFS partitions=1/1 files=1 size=1.06KB
+   row-size=16B cardinality=1.35K
+====
+# Check that for multiple unnests the predicate on the unnested items is pushed to the
+# Unnest node instead of the Scan node. Queries a view instead of a table.
+select id, unnest1 from (
+    select id, unnest(arr1) unnest1, unnest(arr2) unnest2
+    from test_zipping_unnest_db.view_arrays) x
+where unnest1 < 5;
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+|  row-size=20B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+|  |  row-size=20B cardinality=10
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     row-size=16B cardinality=1
+|  |
+|  03:UNNEST [test_zipping_unnest_db.view_arrays.arr1, test_zipping_unnest_db.view_arrays.arr2]
+|     predicates: UNNEST(arr1) < 5
+|     row-size=4B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays]
+   HDFS partitions=1/1 files=1 size=1.06KB
+   row-size=16B cardinality=1.35K
+====
+# Similar as above but using the FROM clause syntax for zipping unnest.
+select id, unnest1 from (
+    select id, arr1.item as unnest1, arr2.item as unnest2
+    from test_zipping_unnest_db.view_arrays a, unnest(a.arr1, a.arr2)) x
+where unnest1 < 5;
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+|  row-size=20B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+|  |  row-size=20B cardinality=10
+|  |
+|  |--02:SINGULAR ROW SRC
+|  |     row-size=16B cardinality=1
+|  |
+|  03:UNNEST [a.arr1, a.arr2]
+|     predicates: arr1.item < 5
+|     row-size=4B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays]
+   HDFS partitions=1/1 files=1 size=1.06KB
+   row-size=16B cardinality=1.35K
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-from-view.test b/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-from-view.test
index 63cd79c8b..0456db078 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-from-view.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-from-view.test
@@ -1,5 +1,156 @@
 ====
 ---- QUERY
+create view view_arrays as
+    select id, arr1, arr2 from functional_parquet.complextypes_arrays;
+---- RESULTS
+'View has been created.'
+====
+---- QUERY
+# Unnest arrays from a view where there are no filters.
+select id, unnest(arr1), unnest(arr2) from view_arrays;
+---- RESULTS
+1,1,'one'
+1,2,'two'
+1,3,'three'
+1,4,'four'
+1,5,'five'
+2,1,'one'
+2,NULL,'two'
+2,3,'three'
+2,4,'NULL'
+2,5,'five'
+3,10,'ten'
+3,9,'NULL'
+3,8,'NULL'
+4,10,'ten'
+4,NULL,'nine'
+4,NULL,'eight'
+5,10,'ten'
+5,NULL,'eleven'
+5,12,'twelve'
+5,NULL,'thirteen'
+6,NULL,'str1'
+6,NULL,'str2'
+7,1,'NULL'
+7,2,'NULL'
+9,NULL,'str1'
+9,NULL,'str2'
+10,1,'NULL'
+10,2,'NULL'
+10,3,'NULL'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Unnest arrays from a view where there is a filter on an outer SELECT.
+# Note, filtering by the unnested items has to happen on the array's item.
+select id, unnest1, unnest2 from (
+    select id, unnest(arr1) as unnest1, unnest(arr2) as unnest2 from view_arrays) x
+where unnest1 < 3;
+---- RESULTS
+1,1,'one'
+1,2,'two'
+2,1,'one'
+7,1,'NULL'
+7,2,'NULL'
+10,1,'NULL'
+10,2,'NULL'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Similar as above but here the unnested field to be filtered by doesn't have an alias.
+select id, unnest1, item from (
+    select id, unnest(arr1) as unnest1, unnest(arr2) from view_arrays) x
+where item is null;
+---- RESULTS
+2,4,'NULL'
+3,9,'NULL'
+3,8,'NULL'
+7,1,'NULL'
+7,2,'NULL'
+10,1,'NULL'
+10,2,'NULL'
+10,3,'NULL'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Giving a filter on an unnested array is allowed if there is only one unnest in the
+# query.
+select id, unnest(arr1) from view_arrays
+where arr1.item > 5;
+---- RESULTS
+3,10
+3,9
+3,8
+4,10
+5,10
+5,12
+---- TYPES
+INT,INT
+====
+---- QUERY
+# Giving a filter on an unnested array is not allowed if there are multiple unnests in the
+# query.
+select id, unnest(arr1), unnest(arr2) from view_arrays
+where arr1.item > 5;
+---- CATCH
+AnalysisException: Not allowed to add a filter on an unnested array under the same select statement: arr1.item
+====
+---- QUERY
+create view view_arrays_with_filter as
+    select id, arr1, arr2 from functional_orc_def.complextypes_arrays where id < 5;
+---- RESULTS
+'View has been created.'
+====
+---- QUERY
+# Unnest arrays from a view where there is a filter within the view.
+select id, unnest(arr1), unnest(arr2) from view_arrays_with_filter;
+---- RESULTS
+1,1,'one'
+1,2,'two'
+1,3,'three'
+1,4,'four'
+1,5,'five'
+2,1,'one'
+2,NULL,'two'
+2,3,'three'
+2,4,'NULL'
+2,5,'five'
+3,10,'ten'
+3,9,'NULL'
+3,8,'NULL'
+4,10,'ten'
+4,NULL,'nine'
+4,NULL,'eight'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Unnest arrays from a view where there are filters both within the view and on the outer
+# select.
+select id, unnest1, unnest2 from (
+    select id, unnest(arr1) unnest1, unnest(arr2) unnest2 from view_arrays_with_filter) x
+where unnest2 is not null;
+---- RESULTS
+1,1,'one'
+1,2,'two'
+1,3,'three'
+1,4,'four'
+1,5,'five'
+2,1,'one'
+2,NULL,'two'
+2,3,'three'
+2,5,'five'
+3,10,'ten'
+4,10,'ten'
+4,NULL,'nine'
+4,NULL,'eight'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
 create view view_unnested_arrays as
     select id, unnest(arr1) as arr1_unnested, unnest(arr2) as arr2_unnested
     from functional_orc_def.complextypes_arrays;
@@ -45,12 +196,287 @@ INT,INT,STRING
 ---- QUERY
 # Same as above but there is a filter in the outer select.
 select id, arr1_unnested, arr2_unnested from view_unnested_arrays
-where arr1_unnested > 5;
+where arr1_unnested >= 5 and length(arr2_unnested) > 3;
+---- RESULTS
+1,5,'five'
+2,5,'five'
+5,12,'twelve'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Create a wrapper view around an existing view that in turn contains arrays.
+# Adds an additional filter on the underlying view.
+create view nested_view_arrays as
+    select id, arr1, arr2 from view_arrays
+    where id % 2 = 1;
+---- RESULTS
+'View has been created.'
+====
+---- QUERY
+# Do an unnest where the array is in a nested view.
+select id, unnest(arr1), unnest(arr2) from nested_view_arrays;
+---- RESULTS
+1,1,'one'
+1,2,'two'
+1,3,'three'
+1,4,'four'
+1,5,'five'
+3,10,'ten'
+3,9,'NULL'
+3,8,'NULL'
+5,10,'ten'
+5,NULL,'eleven'
+5,12,'twelve'
+5,NULL,'thirteen'
+7,1,'NULL'
+7,2,'NULL'
+9,NULL,'str1'
+9,NULL,'str2'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# In this query the array has different unique aliases in the different levels:
+# 'x.arr1', 'arr1'
+select id, unnest(arr1) unnest1, unnest(arr2) from (
+    select id, arr1, arr2 from nested_view_arrays where id < 5) x;
 ---- RESULTS
+1,1,'one'
+1,2,'two'
+1,3,'three'
+1,4,'four'
+1,5,'five'
+3,10,'ten'
+3,9,'NULL'
+3,8,'NULL'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Similar as above but apply an additional filter on the select. The filter is on a
+# non-unnested column.
+select id, unnest(arr1), unnest(arr2) from nested_view_arrays
+where id > 4;
+---- RESULTS
+5,10,'ten'
+5,NULL,'eleven'
+5,12,'twelve'
+5,NULL,'thirteen'
+7,1,'NULL'
+7,2,'NULL'
+9,NULL,'str1'
+9,NULL,'str2'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Similar as above but here the filter is on a column that is being unnested in the same
+# select. This is not allowed.
+select id, unnest(arr1), unnest(arr2) from nested_view_arrays
+where arr1.item = 4;
+---- CATCH
+AnalysisException: Not allowed to add a filter on an unnested array under the same select statement: arr1.item
+====
+---- QUERY
+# Similar as above but here the filter is in an outer select.
+select id, unnest1, unnest2 from (
+    select id, unnest(arr1) unnest1, unnest(arr2) unnest2 from nested_view_arrays) x
+where unnest1 = 4;
+---- RESULTS
+1,4,'four'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Create a wrapper view around an existing view that in turn contains arrays.
+# This wrapper view unnests the arrays from the underlying view.
+create view nested_view_unnested_arrays as
+    select id, unnest(arr1) as arr1_unnested, unnest(arr2) as arr2_unnested
+    from view_arrays
+    where id % 2 = 1;
+---- RESULTS
+'View has been created.'
+====
+---- QUERY
+# Query the nested view where one view gives the arrays and the other unnests them.
+select id, arr1_unnested, arr2_unnested from nested_view_unnested_arrays;
+---- RESULTS
+1,1,'one'
+1,2,'two'
+1,3,'three'
+1,4,'four'
+1,5,'five'
+3,10,'ten'
+3,9,'NULL'
+3,8,'NULL'
+5,10,'ten'
+5,NULL,'eleven'
+5,12,'twelve'
+5,NULL,'thirteen'
+7,1,'NULL'
+7,2,'NULL'
+9,NULL,'str1'
+9,NULL,'str2'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Similar as above but with a where filter on the outermost select.
+select id, arr1_unnested, arr2_unnested from nested_view_unnested_arrays
+where arr1_unnested > 7;
+---- RESULTS
+3,10,'ten'
+3,9,'NULL'
+3,8,'NULL'
+5,10,'ten'
+5,12,'twelve'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# The queries below will test the syntax where the unnest is in the from clause.
+# WHERE filter is not allowed when multiple arrays are being unnested in the same SELECT
+# statement.
+select id, arr1.item from
+    view_arrays va, unnest(va.arr1, va.arr2)
+where arr1.item < 3;
+---- CATCH
+AnalysisException: Not allowed to add a filter on an unnested array under the same select statement: arr1.item
+====
+---- QUERY
+# Similar as above but here only one array is unnested, hence the WHERE filter is allowed.
+select id, arr1.item from
+    view_arrays va, unnest(va.arr1)
+where arr1.item < 3;
+---- RESULTS
+1,1
+1,2
+2,1
+7,1
+7,2
+10,1
+10,2
+---- TYPES
+INT,INT
+====
+---- QUERY
+# An outer select has a WHERE filter while an inner select unnests multiple arrays.
+select id, unnest1, unnest2 from
+    (select id, arr1.item as unnest1, arr2.item as unnest2 from
+        functional_parquet.complextypes_arrays va,
+        unnest(va.arr1, va.arr2)) x
+where unnest1 < 3;
+---- RESULTS
+1,1,'one'
+1,2,'two'
+2,1,'one'
+7,1,'NULL'
+7,2,'NULL'
+10,1,'NULL'
+10,2,'NULL'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Unnest arrays from a view where there is a filter within the view.
+select id, arr1.item, arr2.item from view_arrays_with_filter va, unnest(va.arr1, va.arr2);
+---- RESULTS
+1,1,'one'
+1,2,'two'
+1,3,'three'
+1,4,'four'
+1,5,'five'
+2,1,'one'
+2,NULL,'two'
+2,3,'three'
+2,4,'NULL'
+2,5,'five'
 3,10,'ten'
 3,9,'NULL'
 3,8,'NULL'
 4,10,'ten'
+4,NULL,'nine'
+4,NULL,'eight'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+create view view_unnested_arrays2 as
+    select id, arr1.item as arr1_unnested, arr2.item as arr2_unnested
+    from functional_orc_def.complextypes_arrays va, unnest(va.arr1, va.arr2);
+---- RESULTS
+'View has been created.'
+====
+---- QUERY
+# Query from a view that does the unnesting itself. Apply a filter on top of the view.
+select id, arr1_unnested, arr2_unnested from view_unnested_arrays2
+where arr1_unnested >= 5 and length(arr2_unnested) > 3;
+---- RESULTS
+1,5,'five'
+2,5,'five'
+5,12,'twelve'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Unnest arrays that are coming from nested views.
+select id, arr1.item, arr2.item from nested_view_arrays va, unnest(va.arr1, va.arr2)
+where id > 4;
+---- RESULTS
+5,10,'ten'
+5,NULL,'eleven'
+5,12,'twelve'
+5,NULL,'thirteen'
+7,1,'NULL'
+7,2,'NULL'
+9,NULL,'str1'
+9,NULL,'str2'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Create a wrapper view around an existing view that in turn contains arrays.
+# This wrapper view unnests the arrays from the underlying view using the syntax where
+# the unnest is in the from clause.
+create view nested_view_unnested_arrays2 as
+    select id, arr1.item as arr1_unnested, arr2.item as arr2_unnested
+    from view_arrays va, unnest(va.arr1, va.arr2)
+    where id % 2 = 1;
+---- RESULTS
+'View has been created.'
+====
+---- QUERY
+# Query the nested view where one view gives the arrays and the other unnests them.
+select id, arr1_unnested, arr2_unnested from nested_view_unnested_arrays2;
+---- RESULTS
+1,1,'one'
+1,2,'two'
+1,3,'three'
+1,4,'four'
+1,5,'five'
+3,10,'ten'
+3,9,'NULL'
+3,8,'NULL'
+5,10,'ten'
+5,NULL,'eleven'
+5,12,'twelve'
+5,NULL,'thirteen'
+7,1,'NULL'
+7,2,'NULL'
+9,NULL,'str1'
+9,NULL,'str2'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Similar as above but with a where filter on the outermost select.
+select id, arr1_unnested, arr2_unnested from nested_view_unnested_arrays2
+where arr1_unnested > 7;
+---- RESULTS
+3,10,'ten'
+3,9,'NULL'
+3,8,'NULL'
 5,10,'ten'
 5,12,'twelve'
 ---- TYPES
diff --git a/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-select-list.test
index 64a9621e7..2cbeea134 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-select-list.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-select-list.test
@@ -156,7 +156,7 @@ AnalysisException: Providing zipping and joining unnests together is not support
 ---- QUERY
 # Zipping and joining unnests are given together where a single table ref is in the FROM
 # clause with absolute path.
-select unnest(a) from complextypestbl.int_array_array;
+select unnest(item) from complextypestbl.int_array_array;
 ---- CATCH
 AnalysisException: Providing zipping and joining unnests together is not supported.
 ====
@@ -173,6 +173,24 @@ select unnest(arr1) from complextypes_arrays t, unnest(t.arr2);
 AnalysisException: Providing zipping unnest both in the SELECT list and in the FROM clause is not supported.
 ====
 ---- QUERY
+# WHERE filter on an unnested array in the same SELECT statement is not allowed.
+select id, unnest(arr1), unnest(arr2) from complextypes_arrays where arr1.item = 1;
+---- CATCH
+AnalysisException: Not allowed to add a filter on an unnested array under the same select statement: arr1.item
+====
+---- QUERY
+select id, unnest(arr1), unnest(arr2) from complextypes_arrays
+where id < 3 and length(arr2.item) < 5;
+---- CATCH
+AnalysisException: Not allowed to add a filter on an unnested array under the same select statement: arr2.item
+====
+---- QUERY
+select id, unnest(arr1), unnest(arr2) from complextypes_arrays
+where 3 * arr1.item > 8;
+---- CATCH
+AnalysisException: Not allowed to add a filter on an unnested array under the same select statement: arr1.item
+====
+---- QUERY
 # Do an unnest on the outer layer of a nested array.
 select unnest(int_array_array) from complextypestbl;
 ---- RESULTS
@@ -196,3 +214,19 @@ STRING
 select unnest(int_array_array.item) from complextypestbl;
 ---- CATCH
 AnalysisException: Illegal column/field reference 'int_array_array.item' with intermediate collection 'int_array_array' of type 'ARRAY<ARRAY<INT>>'
+====
+---- QUERY
+# Unnest the same array multiple times. Check that the where filter is still allowed even
+# though there are multiple unnest, but they are for the same array.
+select id, unnest(arr1), unnest(arr1) from complextypes_arrays
+where arr1.item < 3;
+---- RESULTS
+1,1,1
+1,2,2
+2,1,1
+7,1,1
+7,2,2
+10,1,1
+10,2,2
+---- TYPES
+INT,INT,INT

[impala] 02/02: IMPALA-11204: Template implementation for OrcStringColumnReader::ReadValue

Posted by st...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 85ddd27b640bf42a61d8af238938e16618db537e
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Wed Mar 30 10:47:39 2022 +0800

    IMPALA-11204: Template implementation for OrcStringColumnReader::ReadValue
    
    There are some checks in OrcStringColumnReader::ReadValue() that we can
    determine outside the scope of this method. They should be optimized
    since this is a critical method that will be executed for each row (and
    for each string column). With these checks, the method is too complex to
    be inlined in OrcBatchedReader::ReadValueBatch() by the compiler.
    
    This patch templates OrcStringColumnReader::ReadValue() with two
    parameters, one for the target slot type (i.e. STRING/CHAR/VARCHAR),
    and the other one for whether the column is dictionary encoded. Also
    adds an ALWAYS_INLINE marker to force inlining it.
    
    OrcStringColumnReader::ReadValueBatch() will call a template version of
    ReadValue() based on the slot type and the orc batch encoded state.
    
    Ran a single node perf test on TPCH(30) on my dev box using 3 impalad
    instances. There are some improvements and no significant regressions:
    +----------+--------+-------------+------------+
    | Query    | Avg(s) | Base Avg(s) | Delta(Avg) |
    +----------+--------+-------------+------------+
    | TPCH-Q19 | 5.62   | 6.07        | I -7.41%   |
    | TPCH-Q6  | 2.56   | 2.78        | I -7.77%   |
    | TPCH-Q4  | 3.85   | 4.25        | I -9.42%   |
    | TPCH-Q12 | 4.25   | 4.99        | I -14.78%  |
    +----------+--------+-------------+------------+
    Base commit: ff21728
    File Format: orc/snap/block
    Iterations: 30
    
    Change-Id: I5e5f88c28059fb3d3ac1172e6d383d06ee3bedd5
    Reviewed-on: http://gerrit.cloudera.org:8080/18366
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exec/orc-column-readers.cc | 78 +++++++++++++++++++++++++++++++++++++--
 be/src/exec/orc-column-readers.h  | 21 ++++++++++-
 2 files changed, 95 insertions(+), 4 deletions(-)

diff --git a/be/src/exec/orc-column-readers.cc b/be/src/exec/orc-column-readers.cc
index c2136f5fa..83b6e3d47 100644
--- a/be/src/exec/orc-column-readers.cc
+++ b/be/src/exec/orc-column-readers.cc
@@ -181,7 +181,77 @@ Status OrcStringColumnReader::InitBlob(orc::DataBuffer<char>* blob, MemPool* poo
   return Status::OK();
 }
 
+Status OrcStringColumnReader::ReadValueBatch(int row_idx,
+    ScratchTupleBatch* scratch_batch, MemPool* pool, int scratch_batch_idx) {
+  switch (slot_desc_->type().type) {
+    case TYPE_STRING:
+      if (batch_->isEncoded) {
+        return ReadValueBatchInternal<TYPE_STRING, true>(
+            row_idx, scratch_batch, scratch_batch_idx);
+      }
+      return ReadValueBatchInternal<TYPE_STRING, false>(
+          row_idx, scratch_batch, scratch_batch_idx);
+    case TYPE_CHAR:
+      if (batch_->isEncoded) {
+        return ReadValueBatchInternal<TYPE_CHAR, true>(
+            row_idx, scratch_batch, scratch_batch_idx);
+      }
+      return ReadValueBatchInternal<TYPE_CHAR, false>(
+          row_idx, scratch_batch, scratch_batch_idx);
+    case TYPE_VARCHAR:
+      if (batch_->isEncoded) {
+        return ReadValueBatchInternal<TYPE_VARCHAR, true>(
+            row_idx, scratch_batch, scratch_batch_idx);
+      }
+      return ReadValueBatchInternal<TYPE_VARCHAR, false>(
+          row_idx, scratch_batch, scratch_batch_idx);
+    default:
+      return Status("Illegal string type: " + TypeToString(slot_desc_->type().type));
+  }
+}
+
 Status OrcStringColumnReader::ReadValue(int row_idx, Tuple* tuple, MemPool* pool) {
+  switch (slot_desc_->type().type) {
+    case TYPE_STRING:
+      if (batch_->isEncoded) {
+        return ReadValueInternal<TYPE_STRING, true>(row_idx, tuple);
+      }
+      return ReadValueInternal<TYPE_STRING, false>(row_idx, tuple);
+    case TYPE_CHAR:
+      if (batch_->isEncoded) {
+        return ReadValueInternal<TYPE_CHAR, true>(row_idx, tuple);
+      }
+      return ReadValueInternal<TYPE_CHAR, false>(row_idx, tuple);
+    case TYPE_VARCHAR:
+      if (batch_->isEncoded) {
+        return ReadValueInternal<TYPE_VARCHAR, true>(row_idx, tuple);
+      }
+      return ReadValueInternal<TYPE_VARCHAR, false>(row_idx, tuple);
+    default:
+      return Status("Illegal string type: " + TypeToString(slot_desc_->type().type));
+  }
+}
+
+template<PrimitiveType SLOT_TYPE, bool ENCODED>
+Status OrcStringColumnReader::ReadValueBatchInternal(int row_idx,
+    ScratchTupleBatch* scratch_batch, int scratch_batch_idx) {
+  int num_to_read = std::min<int>(scratch_batch->capacity - scratch_batch_idx,
+      NumElements() - row_idx);
+  DCHECK_LE(row_idx + num_to_read, NumElements());
+  for (int i = 0; i < num_to_read; ++i) {
+    int scratch_batch_pos = i + scratch_batch_idx;
+    uint8_t* next_tuple = scratch_batch->tuple_mem +
+        scratch_batch_pos * OrcColumnReader::scanner_->tuple_byte_size();
+    Tuple* tuple = reinterpret_cast<Tuple*>(next_tuple);
+    Status s = ReadValueInternal<SLOT_TYPE, ENCODED>(row_idx + i, tuple);
+    RETURN_IF_ERROR(s);
+  }
+  scratch_batch->num_tuples = scratch_batch_idx + num_to_read;
+  return Status::OK();
+}
+
+template<PrimitiveType SLOT_TYPE, bool ENCODED>
+Status OrcStringColumnReader::ReadValueInternal(int row_idx, Tuple* tuple) {
   if (IsNull(DCHECK_NOTNULL(batch_), row_idx)) {
     SetNullSlot(tuple);
     return Status::OK();
@@ -189,7 +259,8 @@ Status OrcStringColumnReader::ReadValue(int row_idx, Tuple* tuple, MemPool* pool
   char* src_ptr;
   int src_len;
 
-  if (batch_->isEncoded) {
+  if (ENCODED) {
+    DCHECK(batch_->isEncoded);
     orc::EncodedStringVectorBatch* currentBatch =
         static_cast<orc::EncodedStringVectorBatch*>(batch_);
 
@@ -207,7 +278,8 @@ Status OrcStringColumnReader::ReadValue(int row_idx, Tuple* tuple, MemPool* pool
     src_len = batch_->length[row_idx];
   }
   int dst_len = slot_desc_->type().len;
-  if (slot_desc_->type().type == TYPE_CHAR) {
+  if (SLOT_TYPE == TYPE_CHAR) {
+    DCHECK_EQ(slot_desc_->type().type, TYPE_CHAR);
     int unpadded_len = min(dst_len, static_cast<int>(src_len));
     char* dst_char = reinterpret_cast<char*>(GetSlot(tuple));
     memcpy(dst_char, src_ptr, unpadded_len);
@@ -215,7 +287,7 @@ Status OrcStringColumnReader::ReadValue(int row_idx, Tuple* tuple, MemPool* pool
     return Status::OK();
   }
   StringValue* dst = reinterpret_cast<StringValue*>(GetSlot(tuple));
-  if (slot_desc_->type().type == TYPE_VARCHAR && src_len > dst_len) {
+  if (SLOT_TYPE == TYPE_VARCHAR && src_len > dst_len) {
     dst->len = dst_len;
   } else {
     dst->len = src_len;
diff --git a/be/src/exec/orc-column-readers.h b/be/src/exec/orc-column-readers.h
index 2dd5663c1..555607def 100644
--- a/be/src/exec/orc-column-readers.h
+++ b/be/src/exec/orc-column-readers.h
@@ -346,11 +346,30 @@ class OrcStringColumnReader : public OrcPrimitiveColumnReader<OrcStringColumnRea
     return Status::OK();
   }
 
-  Status ReadValue(int row_idx, Tuple* tuple, MemPool* pool) final WARN_UNUSED_RESULT;
+  /// Overrides the method to invoke templated ReadValueBatchInternal().
+  Status ReadValueBatch(int row_idx, ScratchTupleBatch* scratch_batch,
+      MemPool* pool, int scratch_batch_idx) override WARN_UNUSED_RESULT;
+
+  /// Overrides ReadValue() to invoke templated ReadValueBatchInternal().
+  /// It's only used in materializing collection (i.e. list/map) values.
+  Status ReadValue(int row_idx, Tuple* tuple, MemPool* pool) override WARN_UNUSED_RESULT;
 
  private:
   friend class OrcPrimitiveColumnReader<OrcStringColumnReader>;
 
+  /// Template implementation for ReadValueBatch() to avoid checks inside the inner loop.
+  /// When 'ENCODED' is true, the orc string vector batch is expected to be dictionary
+  /// encoded.
+  template<PrimitiveType SLOT_TYPE, bool ENCODED>
+  Status ReadValueBatchInternal(int row_idx, ScratchTupleBatch* scratch_batch,
+      int scratch_batch_idx) WARN_UNUSED_RESULT;
+
+  /// Template implementation for ReadValue(). We use this method instead of ReadValue()
+  /// in ReadValueBatch() so we can check types out of the loop.
+  template<PrimitiveType SLOT_TYPE, bool ENCODED>
+  ALWAYS_INLINE inline Status ReadValueInternal(int row_idx, Tuple* tuple)
+      WARN_UNUSED_RESULT;
+
   orc::StringVectorBatch* batch_ = nullptr;
   // We copy the blob from the batch, so the memory will be handled by Impala, and not
   // by the ORC lib.