You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/04/05 08:57:37 UTC
[impala] 01/02: IMPALA-11038: Zipping unnest from view
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 32e2ace38df03b6a760d636154c0ca78304568de
Author: Gabor Kaszab <ga...@cloudera.com>
AuthorDate: Thu Nov 18 17:38:37 2021 +0100
IMPALA-11038: Zipping unnest from view
IMPALA-10920 introduced zipping unnest functionality for arrays that
are in a table. This patch improves that support further by accepting
inputs from views as well.
Testing:
- Added planner tests to verify which execution node handles the
predicates on unnested items.
- E2E tests for both unnesting syntaxes (select list and from clause)
to cover when the source of the unnested arrays is not a table but a
view. Also tested multi-level views and filtering the unnested items
on different levels.
Change-Id: I68f649dda9e41f257e7f6596193d07b24049f92a
Reviewed-on: http://gerrit.cloudera.org:8080/18094
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Gabor Kaszab <ga...@cloudera.com>
---
be/src/exec/subplan-node.cc | 2 +-
be/src/exec/unnest-node.cc | 2 +-
be/src/exec/unnest-node.h | 2 +-
.../java/org/apache/impala/analysis/Analyzer.java | 47 ++-
.../apache/impala/analysis/CollectionTableRef.java | 9 +
.../org/apache/impala/analysis/FromClause.java | 4 +
.../org/apache/impala/analysis/SelectStmt.java | 93 ++++-
.../java/org/apache/impala/analysis/SlotRef.java | 22 +-
.../java/org/apache/impala/analysis/TableRef.java | 3 +
.../org/apache/impala/analysis/UnnestExpr.java | 85 +++-
.../org/apache/impala/planner/HdfsScanNode.java | 4 +-
.../java/org/apache/impala/planner/PlanNode.java | 31 ++
.../apache/impala/planner/SingleNodePlanner.java | 2 +
.../apache/impala/planner/SingularRowSrcNode.java | 3 +
.../java/org/apache/impala/planner/UnnestNode.java | 7 +
.../org/apache/impala/planner/PlannerTest.java | 8 +
.../queries/PlannerTest/zipping-unnest.test | 206 ++++++++++
.../QueryTest/zipping-unnest-from-view.test | 428 ++++++++++++++++++++-
.../QueryTest/zipping-unnest-in-select-list.test | 36 +-
19 files changed, 945 insertions(+), 49 deletions(-)
diff --git a/be/src/exec/subplan-node.cc b/be/src/exec/subplan-node.cc
index 26b7b4c93..c38ce35b2 100644
--- a/be/src/exec/subplan-node.cc
+++ b/be/src/exec/subplan-node.cc
@@ -44,7 +44,7 @@ Status SubplanPlanNode::SetContainingSubplan(
} else {
if (node->tnode_->node_type == TPlanNodeType::UNNEST_NODE) {
UnnestPlanNode* unnest_node = reinterpret_cast<UnnestPlanNode*>(node);
- RETURN_IF_ERROR(unnest_node->InitCollExpr(state));
+ RETURN_IF_ERROR(unnest_node->InitCollExprs(state));
}
int num_children = node->children_.size();
for (int i = 0; i < num_children; ++i) {
diff --git a/be/src/exec/unnest-node.cc b/be/src/exec/unnest-node.cc
index a1f75f5bb..36f084d72 100644
--- a/be/src/exec/unnest-node.cc
+++ b/be/src/exec/unnest-node.cc
@@ -48,7 +48,7 @@ void UnnestPlanNode::Close() {
PlanNode::Close();
}
-Status UnnestPlanNode::InitCollExpr(FragmentState* state) {
+Status UnnestPlanNode::InitCollExprs(FragmentState* state) {
DCHECK(containing_subplan_ != nullptr)
<< "set_containing_subplan() must have been called";
const RowDescriptor& row_desc = *containing_subplan_->children_[0]->row_descriptor_;
diff --git a/be/src/exec/unnest-node.h b/be/src/exec/unnest-node.h
index deea0264d..fbe3194ee 100644
--- a/be/src/exec/unnest-node.h
+++ b/be/src/exec/unnest-node.h
@@ -33,7 +33,7 @@ class UnnestPlanNode : public PlanNode {
virtual Status CreateExecNode(RuntimeState* state, ExecNode** node) const override;
/// Initializes the expressions that produce the collections to be unnested.
/// Called by the containing subplan plan-node.
- Status InitCollExpr(FragmentState* state);
+ Status InitCollExprs(FragmentState* state);
~UnnestPlanNode(){}
diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index 5cdadbff1..5fcc742bb 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -513,15 +513,19 @@ public class Analyzer {
public final Map<String, org.apache.kudu.client.KuduTable> kuduTables =
new HashMap<>();
- // This holds the tuple id's of the arrays that are given as a zipping unnest table
- // ref.
- public Set<TupleId> zippingUnnestTupleIds = new HashSet<>();
-
// This holds the nullable side slot ids from the outer join's equi-join conjuncts
// e.g. t1 left join t2 on t1.id = t2.id, the slot id of t2.id will be added to
// this set.
public Set<SlotId> ojNullableSlotsInEquiPreds = new HashSet<>();
+ // This holds the tuple id's of the arrays that are given as a zipping unnest table
+ // ref. If the table ref is originated from a view then also add the tuple IDs for the
+ // respective table refs from the view.
+ public Set<TupleId> zippingUnnestTupleIds = new HashSet<>();
+
+ // Shows how many zipping unnests were in the query;
+ public int numZippingUnnests = 0;
+
public GlobalState(StmtTableCache stmtTableCache, TQueryCtx queryCtx,
AuthorizationFactory authzFactory, AuthorizationContext authzCtx) {
this.stmtTableCache = stmtTableCache;
@@ -725,10 +729,16 @@ public class Analyzer {
}
}
- public boolean isRegisteredTableRef(TableRef ref) {
- if (ref == null) return false;
- String uniqueAlias = ref.getUniqueAlias();
- return aliasMap_.containsKey(uniqueAlias);
+ /**
+ * Checks if a table ref has already been registered in this analyzer and returns it.
+ * Uses the unique alias from the table ref for the check. Returns null if the table
+ * ref has not been registered.
+ */
+ public TableRef getRegisteredTableRef(String uniqueAlias) {
+ if (uniqueAlias == null) return null;
+ TupleDescriptor tupleDesc = aliasMap_.get(uniqueAlias);
+ if (tupleDesc == null) return null;
+ return tableRefMap_.get(tupleDesc.getId());
}
/**
@@ -787,6 +797,10 @@ public class Analyzer {
tableRefMap_.put(desc.getId(), ref);
}
+ public void addAlias(String alias, TupleDescriptor desc) {
+ aliasMap_.put(alias, desc);
+ }
+
/**
* Resolves the given TableRef into a concrete BaseTableRef, ViewRef or
* CollectionTableRef. Returns the new resolved table ref or the given table
@@ -998,6 +1012,11 @@ public class Analyzer {
public void addZippingUnnestTupleId(CollectionTableRef tblRef) {
Expr collExpr = tblRef.getCollectionExpr();
+ addZippingUnnestTupleId(collExpr);
+ }
+
+ public void addZippingUnnestTupleId(Expr collExpr) {
+ if (collExpr == null) return;
if (!(collExpr instanceof SlotRef)) return;
SlotRef slotCollExpr = (SlotRef)collExpr;
SlotDescriptor collSlotDesc = slotCollExpr.getDesc();
@@ -1007,10 +1026,22 @@ public class Analyzer {
globalState_.zippingUnnestTupleIds.add(collTupleDesc.getId());
}
+ public void addZippingUnnestTupleId(TupleId tid) {
+ globalState_.zippingUnnestTupleIds.add(tid);
+ }
+
public Set<TupleId> getZippingUnnestTupleIds() {
return globalState_.zippingUnnestTupleIds;
}
+ public void increaseZippingUnnestCount() {
+ ++globalState_.numZippingUnnests;
+ }
+
+ public int getNumZippingUnnests() {
+ return globalState_.numZippingUnnests;
+ }
+
/**
* Returns the descriptor of the given explicit or implicit table alias or null if no
* such alias has been registered.
diff --git a/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java b/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java
index 962009ac1..f22bb2609 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java
@@ -100,6 +100,13 @@ public class CollectionTableRef extends TableRef {
if (resolvedPath_.getRootDesc() != null) {
sourceView = resolvedPath_.getRootDesc().getSourceView();
}
+ if (sourceView != null && zippingUnnestType_ ==
+ ZippingUnnestType.FROM_CLAUSE_ZIPPING_UNNEST) {
+ String implicitAlias = rawPath_.get(rawPath_.size() - 1).toLowerCase();
+ analyzer.addZippingUnnestTupleId(analyzer.getDescriptor(implicitAlias).getId());
+ TableRef existingTableRef = analyzer.getRegisteredTableRef(getUniqueAlias());
+ existingTableRef.getDesc().setHidden(false);
+ }
if (sourceView == null || inSelectList_) {
desc_ = analyzer.registerTableRef(this);
// Avoid polluting the namespace with collections that back arrays
@@ -195,6 +202,8 @@ public class CollectionTableRef extends TableRef {
public Expr getCollectionExpr() { return collectionExpr_; };
+ @Override
+ public boolean isCollectionInSelectList() { return inSelectList_; }
public void setInSelectList(boolean value) { inSelectList_ = value; }
@Override
diff --git a/fe/src/main/java/org/apache/impala/analysis/FromClause.java b/fe/src/main/java/org/apache/impala/analysis/FromClause.java
index f91dbfc70..dbe60aaf7 100644
--- a/fe/src/main/java/org/apache/impala/analysis/FromClause.java
+++ b/fe/src/main/java/org/apache/impala/analysis/FromClause.java
@@ -105,6 +105,7 @@ public class FromClause extends StmtNode implements Iterable<TableRef> {
}
if (firstZippingUnnestRef == null) firstZippingUnnestRef = tblRef;
analyzer.addZippingUnnestTupleId((CollectionTableRef)tblRef);
+ analyzer.increaseZippingUnnestCount();
}
}
}
@@ -146,6 +147,9 @@ public class FromClause extends StmtNode implements Iterable<TableRef> {
private void checkTopLevelComplexAcidScan(Analyzer analyzer,
CollectionTableRef collRef) {
if (collRef.getCollectionExpr() != null) return;
+ // Don't do any checks of the collection that came from a view as getTable() would
+ // return null in that case.
+ if (collRef.getTable() == null) return;
if (!AcidUtils.isFullAcidTable(
collRef.getTable().getMetaStoreTable().getParameters())) {
return;
diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index f6d5f7b5c..409235933 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -26,6 +26,7 @@ import java.util.Map;
import java.util.Set;
import org.apache.impala.analysis.Path.PathType;
+import org.apache.impala.analysis.TableRef.ZippingUnnestType;
import org.apache.impala.authorization.Privilege;
import org.apache.impala.catalog.ArrayType;
import org.apache.impala.catalog.Column;
@@ -149,6 +150,8 @@ public class SelectStmt extends QueryStmt {
public boolean hasHavingClause() { return havingClause_ != null; }
public ExprSubstitutionMap getBaseTblSmap() { return baseTblSmap_; }
+ public void addToFromClause(TableRef ref) { fromClause_.add(ref); }
+
/**
* A simple limit statement has a limit but no order-by,
* group-by, aggregates or analytic functions. Joins are
@@ -287,6 +290,8 @@ public class SelectStmt extends QueryStmt {
analyzeWhereClause();
createSortInfo(analyzer_);
+ setZippingUnnestSlotRefsFromViews();
+
// Analyze aggregation-relevant components of the select block (Group By
// clause, select list, Order By clause), substitute AVG with SUM/COUNT,
// create the AggregationInfo, including the agg output tuple, and transform
@@ -446,29 +451,77 @@ public class SelectStmt extends QueryStmt {
"WHERE clause must not contain analytic expressions: " + e.toSql());
}
- // Don't allow a WHERE conjunct on an array item that is part of a zipping unnest.
- // In case there is only one zipping unnested array this restriction is not needed
- // as the UNNEST node has to handle a single array and it's safe to do the filtering
- // in the scanner.
- Set<TupleId> zippingUnnestTupleIds = analyzer_.getZippingUnnestTupleIds();
- if (zippingUnnestTupleIds.size() > 1) {
- for (Expr expr : whereClause_.getChildren()) {
- if (expr == null || !(expr instanceof SlotRef)) continue;
- SlotRef slotRef = (SlotRef)expr;
- for (TupleId tid : zippingUnnestTupleIds) {
- TupleDescriptor collTupleDesc = analyzer_.getTupleDesc(tid);
- // If there is no slot ref for the collection tuple then there is no need to
- // check.
- if (collTupleDesc.getSlots().size() == 0) continue;
- Preconditions.checkState(collTupleDesc.getSlots().size() == 1);
- if (slotRef.getDesc().equals(collTupleDesc.getSlots().get(0))) {
- throw new AnalysisException("Not allowed to add a filter on an unnested " +
- "array under the same select statement: " + expr.toSql());
+ verifyZippingUnnestSlots();
+
+ analyzer_.registerConjuncts(whereClause_, false);
+ }
+
+ /**
+ * Don't allow a WHERE conjunct on an array item that is part of a zipping unnest.
+ * In case there is only one zipping unnested array this restriction is not needed
+ * as the UNNEST node has to handle a single array and it's safe to do the filtering
+ * in the scanner.
+ */
+ private void verifyZippingUnnestSlots() throws AnalysisException {
+ if (analyzer_.getNumZippingUnnests() <= 1) return;
+ List<TupleId> zippingUnnestTupleIds = Lists.newArrayList(
+ analyzer_.getZippingUnnestTupleIds());
+
+ List<SlotRef> slotRefsInWhereClause = new ArrayList<>();
+ whereClause_.collect(SlotRef.class, slotRefsInWhereClause);
+ for (SlotRef slotRef : slotRefsInWhereClause) {
+ if (slotRef.isBoundByTupleIds(zippingUnnestTupleIds)) {
+ throw new AnalysisException("Not allowed to add a filter on an unnested " +
+ "array under the same select statement: " + slotRef.toSql());
+ }
+ }
+ }
+
+ /**
+ * When zipping unnest is performed using the SQL standard compliant syntax (where
+ * the unnest is in the FROM clause) the SlotRefs used for the zipping unnest are not
+ * UnnestExprs as with the other approach (where zipping unnest is in the select list)
+ * but regular SlotRefs. As a result they have to be marked so that later on anything
+ * specific for zipping unnest could be executed for them.
+ * This function identifies the SlotRefs that are for zipping unnesting and sets a
+ * flag for them. Note, only marks the SlotRefs that are originated form a view.
+ */
+ private void setZippingUnnestSlotRefsFromViews() {
+ for (TableRef tblRef : fromClause_.getTableRefs()) {
+ if (!tblRef.isFromClauseZippingUnnest()) continue;
+ Preconditions.checkState(tblRef instanceof CollectionTableRef);
+ ExprSubstitutionMap exprSubMap = getBaseTableSMapFromTableRef(tblRef);
+ if (exprSubMap == null) continue;
+
+ for (SelectListItem item : selectList_.getItems()) {
+ if (item.isStar()) continue;
+ Expr itemExpr = item.getExpr();
+ List<SlotRef> slotRefs = new ArrayList<>();
+ itemExpr.collect(SlotRef.class, slotRefs);
+ for (SlotRef slotRef : slotRefs) {
+ Expr subbedExpr = exprSubMap.get(slotRef);
+ if (subbedExpr == null || !(subbedExpr instanceof SlotRef)) continue;
+ SlotRef subbedSlotRef = (SlotRef)subbedExpr;
+ CollectionTableRef collTblRef = (CollectionTableRef)tblRef;
+ SlotRef collectionSlotRef = (SlotRef)collTblRef.getCollectionExpr();
+ // Check if 'slotRef' is originated from 'collectionSlotRef'.
+ if (subbedSlotRef.getDesc().getParent().getId() ==
+ collectionSlotRef.getDesc().getItemTupleDesc().getId()) {
+ slotRef.setIsZippingUnnest(true);
}
}
}
}
- analyzer_.registerConjuncts(whereClause_, false);
+ }
+
+ /**
+ * If 'tblRef' is originated from a view then returns the baseTblSmap from the view.
+ * Returns false otherwise.
+ */
+ private ExprSubstitutionMap getBaseTableSMapFromTableRef(TableRef tblRef) {
+ if (tblRef.getResolvedPath().getRootDesc() == null) return null;
+ if (tblRef.getResolvedPath().getRootDesc().getSourceView() == null) return null;
+ return tblRef.getResolvedPath().getRootDesc().getSourceView().getBaseTblSmap();
}
/**
@@ -522,7 +575,7 @@ public class SelectStmt extends QueryStmt {
continue;
// Don't push down the "is not empty" predicate for zipping unnests if there are
// multiple zipping unnests in the FROM clause.
- if (tblRef.isZippingUnnest() && analyzer_.getZippingUnnestTupleIds().size() > 1) {
+ if (tblRef.isZippingUnnest() && analyzer_.getNumZippingUnnests() > 1) {
continue;
}
IsNotEmptyPredicate isNotEmptyPred =
diff --git a/fe/src/main/java/org/apache/impala/analysis/SlotRef.java b/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
index c1409b7b6..27438b897 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
@@ -23,7 +23,6 @@ import java.util.Set;
import org.apache.impala.analysis.Path.PathType;
import org.apache.impala.catalog.FeFsTable;
-import org.apache.impala.catalog.ColumnStats;
import org.apache.impala.catalog.FeTable;
import org.apache.impala.catalog.HdfsFileFormat;
import org.apache.impala.catalog.StructField;
@@ -43,14 +42,19 @@ import com.google.common.base.Preconditions;
public class SlotRef extends Expr {
protected List<String> rawPath_;
- private final String label_; // printed in toSql()
+ protected final String label_; // printed in toSql()
// Results of analysis.
- private SlotDescriptor desc_;
+ protected SlotDescriptor desc_;
// The resolved path after resolving 'rawPath_'.
protected Path resolvedPath_ = null;
+ // Indicates if this SlotRef is coming from zipping unnest where the unest is given in
+ // the FROM clause. Note, when the unnest in in the select list then an UnnestExpr would
+ // be used instead of a SlotRef.
+ protected boolean isZippingUnnest_ = false;
+
public SlotRef(List<String> rawPath) {
super();
rawPath_ = rawPath;
@@ -103,6 +107,7 @@ public class SlotRef extends Expr {
}
label_ = other.label_;
desc_ = other.desc_;
+ isZippingUnnest_ = other.isZippingUnnest_;
}
/**
@@ -300,6 +305,8 @@ public class SlotRef extends Expr {
return desc_.getPath();
}
+ public void setIsZippingUnnest(boolean b) { isZippingUnnest_ = b; }
+
@Override
public String toSqlImpl(ToSqlOptions options) {
if (label_ != null) return label_;
@@ -366,6 +373,15 @@ public class SlotRef extends Expr {
@Override
public boolean isBoundByTupleIds(List<TupleId> tids) {
Preconditions.checkState(desc_ != null);
+ // If this SlotRef is coming from zipping unnest then try to do a similar check as
+ // UnnestExpr does.
+ if (isZippingUnnest_ && desc_.getParent() != null &&
+ desc_.getParent().getRootDesc() != null) {
+ TupleId parentId = desc_.getParent().getRootDesc().getId();
+ for (TupleId tid: tids) {
+ if (tid.equals(parentId)) return true;
+ }
+ }
for (TupleId tid: tids) {
if (tid.equals(desc_.getParent().getId())) return true;
}
diff --git a/fe/src/main/java/org/apache/impala/analysis/TableRef.java b/fe/src/main/java/org/apache/impala/analysis/TableRef.java
index 0a9b9f358..ed3d813df 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TableRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TableRef.java
@@ -403,6 +403,9 @@ public class TableRef extends StmtNode {
public boolean isAnalyzed() { return isAnalyzed_; }
public boolean isResolved() { return !getClass().equals(TableRef.class); }
+ public boolean isFromClauseZippingUnnest() {
+ return zippingUnnestType_ == ZippingUnnestType.FROM_CLAUSE_ZIPPING_UNNEST;
+ }
public boolean isZippingUnnest() {
return zippingUnnestType_ != ZippingUnnestType.NONE;
}
diff --git a/fe/src/main/java/org/apache/impala/analysis/UnnestExpr.java b/fe/src/main/java/org/apache/impala/analysis/UnnestExpr.java
index 6d63ff358..ec3355077 100644
--- a/fe/src/main/java/org/apache/impala/analysis/UnnestExpr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/UnnestExpr.java
@@ -22,7 +22,6 @@ import org.apache.impala.analysis.TableRef.ZippingUnnestType;
import org.apache.impala.catalog.TableLoadingException;
import org.apache.impala.catalog.Type;
import org.apache.impala.common.AnalysisException;
-import org.apache.impala.thrift.TExprNode;
import com.google.common.base.Preconditions;
import java.util.ArrayList;
@@ -40,6 +39,15 @@ public class UnnestExpr extends SlotRef {
protected UnnestExpr(UnnestExpr other) {
super(other);
+ // Removing "item" from the end of the path is required so that re-analyze will work
+ // as well after adding "item" to the path in the first round of analysis here in the
+ // unnest.
+ removeItemFromPath();
+ rawPathWithoutItem_ = other.rawPathWithoutItem_;
+ }
+
+ protected UnnestExpr(SlotDescriptor desc) {
+ super(desc);
}
@Override
@@ -54,11 +62,13 @@ public class UnnestExpr extends SlotRef {
// find the corresponding CollectionTableRef during resolution.
Path resolvedPath = resolveAndVerifyRawPath(analyzer);
Preconditions.checkNotNull(resolvedPath);
+ if (!rawPathWithoutItem_.isEmpty()) rawPathWithoutItem_.clear();
rawPathWithoutItem_.addAll(rawPath_);
List<String> tableRefRawPath = constructRawPathForTableRef(resolvedPath);
Preconditions.checkNotNull(tableRefRawPath);
- createAndRegisterCollectionTableRef(tableRefRawPath, analyzer);
+ CollectionTableRef tblRef = createAndRegisterCollectionTableRef(tableRefRawPath,
+ analyzer);
// 'rawPath_' points to an array and we need a SlotRef to refer to the item of the
// array. Hence, adding "item" to the end of the path.
@@ -69,12 +79,14 @@ public class UnnestExpr extends SlotRef {
rawPath_ = rawPath_.subList(rawPath_.size() - 2, rawPath_.size());
}
super.analyzeImpl(analyzer);
+ Preconditions.checkState(tblRef.desc_.getSlots().size() == 1);
+ analyzer.addZippingUnnestTupleId(desc_.getParent().getId());
}
private void verifyTableRefs(Analyzer analyzer) throws AnalysisException {
for (TableRef ref : analyzer.getTableRefs().values()) {
if (ref instanceof CollectionTableRef) {
- if (!ref.isZippingUnnest()) {
+ if (!ref.isZippingUnnest() && !ref.isCollectionInSelectList()) {
throw new AnalysisException(
"Providing zipping and joining unnests together is not supported.");
} else if (ref.getZippingUnnestType() ==
@@ -119,22 +131,40 @@ public class UnnestExpr extends SlotRef {
return tableRefRawPath;
}
- private void createAndRegisterCollectionTableRef(List<String> tableRefRawPath,
- Analyzer analyzer) throws AnalysisException {
- TableRef tblRef = new TableRef(tableRefRawPath, null);
+ private CollectionTableRef createAndRegisterCollectionTableRef(
+ List<String> tableRefRawPath, Analyzer analyzer) throws AnalysisException {
+ String alias = "";
+ if (rawPath_ != null && rawPath_.size() > 0) {
+ alias = rawPath_.get(rawPath_.size() - 1);
+ }
+ TableRef tblRef = new TableRef(tableRefRawPath, alias);
tblRef = analyzer.resolveTableRef(tblRef);
Preconditions.checkState(tblRef instanceof CollectionTableRef);
tblRef.setZippingUnnestType(ZippingUnnestType.SELECT_LIST_ZIPPING_UNNEST);
- if (!analyzer.isRegisteredTableRef(tblRef)) {
+ TableRef existingTblRef = analyzer.getRegisteredTableRef(tblRef.getUniqueAlias());
+ if (existingTblRef == null) {
tblRef.analyze(analyzer);
- // This just registers the tbl ref to be added to the FROM clause because it's not
- // available here. Note, SelectStmt will add it to the FROM clause during analysis.
+ // This just registers the table ref in the analyzer to be added to the FROM clause
+ // because the FROM clause is not available here. Note, a query rewrite will add it
+ // eventually before a re-analysis.
analyzer.addTableRefFromUnnestExpr((CollectionTableRef)tblRef);
+ return (CollectionTableRef)tblRef;
+ } else if (existingTblRef.isCollectionInSelectList() ||
+ existingTblRef.getZippingUnnestType() == ZippingUnnestType.NONE) {
+ Preconditions.checkState(existingTblRef instanceof CollectionTableRef);
+ // This case happens when unnesting an array that comes from a view where the array
+ // is present in the select list of the view. Here the view has already registered
+ // the table ref to the analyzer.
+ existingTblRef.setZippingUnnestType(ZippingUnnestType.SELECT_LIST_ZIPPING_UNNEST);
+ analyzer.addTableRefFromUnnestExpr((CollectionTableRef)existingTblRef);
}
+ existingTblRef.setHidden(false);
+ existingTblRef.getDesc().setHidden(false);
+ return (CollectionTableRef)existingTblRef;
}
private void removeItemFromPath() {
- Preconditions.checkNotNull(rawPath_);
+ if (rawPath_ == null || rawPath_.isEmpty()) return;
if (rawPath_.get(rawPath_.size() - 1).equals("item")) {
rawPath_.remove(rawPath_.size() - 1);
}
@@ -145,6 +175,39 @@ public class UnnestExpr extends SlotRef {
@Override
public String toSqlImpl(ToSqlOptions options) {
- return "UNNEST(" + ToSqlUtils.getPathSql(rawPathWithoutItem_) + ")";
+ Preconditions.checkState(isAnalyzed());
+ String label = "";
+ if (rawPathWithoutItem_ != null) label = ToSqlUtils.getPathSql(rawPathWithoutItem_);
+ if (label.equals("") && label_ != null) {
+ // If 'rawPathWithoutItem_' is null then this slot is from a view and we have to
+ // use 'label_' for displaying purposes.
+ if (label_.endsWith(".item")) label = label_.substring(0, label_.length() - 5);
+ }
+ return "UNNEST(" + label + ")";
+ }
+
+ @Override
+ protected Expr substituteImpl(ExprSubstitutionMap smap, Analyzer analyzer) {
+ if (smap == null) return this;
+ SlotRef slotRef = new SlotRef(this.desc_);
+ Expr substExpr = smap.get(slotRef);
+ if (substExpr == null) {
+ UnnestExpr unnestExpr = new UnnestExpr(this.desc_);
+ substExpr = smap.get(unnestExpr);
+ if (substExpr == null) return this;
+ return substExpr;
+ }
+
+ return new UnnestExpr(((SlotRef)substExpr).getDesc());
+ }
+
+ @Override
+ public boolean isBoundByTupleIds(List<TupleId> tids) {
+ Preconditions.checkState(desc_ != null);
+ TupleId parentId = desc_.getParent().getRootDesc().getId();
+ for (TupleId tid: tids) {
+ if (tid.equals(parentId)) return true;
+ }
+ return super.isBoundByTupleIds(tids);
}
}
\ No newline at end of file
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index 9d33158f1..a5dba3831 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -948,8 +948,8 @@ public class HdfsScanNode extends ScanNode {
// FROM clause then avoid pushing down conjunct for this slot to the scanner as it
// would result incorrect results on that slot after performing the unnest.
// One exception is when there is only one such table reference in the FROM clause.
- Set<TupleId> zippingUnnestTupleIds = analyzer.getZippingUnnestTupleIds();
- if (zippingUnnestTupleIds.size() > 1 && zippingUnnestTupleIds.contains(itemTid)) {
+ if (analyzer.getNumZippingUnnests() > 1 &&
+ analyzer.getZippingUnnestTupleIds().contains(itemTid)) {
continue;
}
diff --git a/fe/src/main/java/org/apache/impala/planner/PlanNode.java b/fe/src/main/java/org/apache/impala/planner/PlanNode.java
index e745ca675..b59acedc3 100644
--- a/fe/src/main/java/org/apache/impala/planner/PlanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/PlanNode.java
@@ -21,6 +21,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
+import java.util.Iterator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@@ -32,6 +33,7 @@ import org.apache.impala.analysis.Expr;
import org.apache.impala.analysis.ExprId;
import org.apache.impala.analysis.ExprSubstitutionMap;
import org.apache.impala.analysis.SlotDescriptor;
+import org.apache.impala.analysis.SlotRef;
import org.apache.impala.analysis.ToSqlOptions;
import org.apache.impala.analysis.TupleDescriptor;
import org.apache.impala.analysis.TupleId;
@@ -495,6 +497,30 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
msg.num_children = numChildren;
}
+ /**
+ * If there are more than one array to be zipping unnested in the 'analyzer' then
+ * removes the conjuncts related to the unnested array item from the 'conjuncts' list.
+ * This could be useful to prevent e.g. ScanNode or SingularRowSrc to pick up the
+ * conjuncts for zipping unnested arrays and let the UnnestNode to take care of them.
+ */
+ public static void removeZippingUnnestConjuncts(List<Expr> conjuncts,
+ Analyzer analyzer) {
+ Set<TupleId> zippingUnnestTupleIds = analyzer.getZippingUnnestTupleIds();
+
+ Iterator<Expr> it = conjuncts.iterator();
+ while(it.hasNext()) {
+ Expr e = it.next();
+ List<SlotRef> slotRefs = new ArrayList<>();
+ e.collect(SlotRef.class, slotRefs);
+ for (SlotRef slotRef : slotRefs) {
+ if (zippingUnnestTupleIds.contains(slotRef.getDesc().getParent().getId())) {
+ it.remove();
+ break;
+ }
+ }
+ }
+ }
+
/**
* Computes the full internal state, including smap and planner-relevant statistics
* (calls computeStats()), marks all slots referenced by this node as materialized
@@ -515,10 +541,15 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
*/
protected void assignConjuncts(Analyzer analyzer) {
List<Expr> unassigned = analyzer.getUnassignedConjuncts(this);
+ if (!shouldPickUpZippingUnnestConjuncts()) {
+ removeZippingUnnestConjuncts(unassigned, analyzer);
+ }
conjuncts_.addAll(unassigned);
analyzer.markConjunctsAssigned(unassigned);
}
+ protected boolean shouldPickUpZippingUnnestConjuncts() { return true; }
+
/**
* Apply the provided conjuncts to the this node, returning the new root of
* the plan tree. Also add any slot equivalences for tupleIds that have not
diff --git a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
index 46ab74914..734885fbe 100644
--- a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
@@ -1849,6 +1849,8 @@ public class SingleNodePlanner {
// Also add remaining unassigned conjuncts
List<Expr> unassigned = analyzer.getUnassignedConjuncts(tid.asList());
+ PlanNode.removeZippingUnnestConjuncts(unassigned, analyzer);
+
conjuncts.addAll(unassigned);
analyzer.markConjunctsAssigned(unassigned);
analyzer.createEquivConjuncts(tid, conjuncts);
diff --git a/fe/src/main/java/org/apache/impala/planner/SingularRowSrcNode.java b/fe/src/main/java/org/apache/impala/planner/SingularRowSrcNode.java
index e9c498417..bef43b783 100644
--- a/fe/src/main/java/org/apache/impala/planner/SingularRowSrcNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/SingularRowSrcNode.java
@@ -88,4 +88,7 @@ public class SingularRowSrcNode extends PlanNode {
protected void toThrift(TPlanNode msg) {
msg.node_type = TPlanNodeType.SINGULAR_ROW_SRC_NODE;
}
+
+ @Override
+ protected boolean shouldPickUpZippingUnnestConjuncts() { return false; }
}
diff --git a/fe/src/main/java/org/apache/impala/planner/UnnestNode.java b/fe/src/main/java/org/apache/impala/planner/UnnestNode.java
index 3aa7e8641..e5247af0f 100644
--- a/fe/src/main/java/org/apache/impala/planner/UnnestNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/UnnestNode.java
@@ -17,12 +17,14 @@
package org.apache.impala.planner;
+import java.util.Comparator;
import java.util.List;
import org.apache.impala.analysis.Analyzer;
import org.apache.impala.analysis.CollectionTableRef;
import org.apache.impala.analysis.Expr;
import org.apache.impala.analysis.SlotRef;
+import org.apache.impala.analysis.ToSqlUtils;
import org.apache.impala.common.ImpalaException;
import org.apache.impala.thrift.TExplainLevel;
import org.apache.impala.thrift.TPlanNode;
@@ -120,6 +122,11 @@ public class UnnestNode extends PlanNode {
protected String getDisplayLabelDetail() {
StringBuilder strBuilder = new StringBuilder();
boolean first = true;
+ tblRefs_.sort( (CollectionTableRef t1, CollectionTableRef t2) -> {
+ String path1 = ToSqlUtils.getPathSql(t1.getPath());
+ String path2 = ToSqlUtils.getPathSql(t2.getPath());
+ return path1.compareTo(path2);
+ });
for (CollectionTableRef tblRef : tblRefs_) {
if (!first) strBuilder.append(", ");
strBuilder.append(Joiner.on(".").join(tblRef.getPath()));
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index 2b8bccd21..fb395ea61 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -351,6 +351,14 @@ public class PlannerTest extends PlannerTestBase {
runPlannerTestFile("complex-types-file-formats");
}
+ @Test
+ public void testZippingUnnest() {
+ addTestDb("test_zipping_unnest_db", "For creating views for zipping unnest queries.");
+ addTestView("create view test_zipping_unnest_db.view_arrays as " +
+ "select id, arr1, arr2 from functional_parquet.complextypes_arrays");
+ runPlannerTestFile("zipping-unnest");
+ }
+
@Test
public void testJoins() {
TQueryOptions options = defaultQueryOptions();
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/zipping-unnest.test b/testdata/workloads/functional-planner/queries/PlannerTest/zipping-unnest.test
new file mode 100644
index 000000000..a9672637d
--- /dev/null
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/zipping-unnest.test
@@ -0,0 +1,206 @@
+# Check that for a single unnest the predicate on the unnested items is pushed to the
+# Scan node.
+select id, item from (
+ select id, unnest(arr1) from functional_parquet.complextypes_arrays) x
+where item < 5;
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+| row-size=20B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+| | row-size=20B cardinality=10
+| |
+| |--02:SINGULAR ROW SRC
+| | row-size=16B cardinality=1
+| |
+| 03:UNNEST [functional_parquet.complextypes_arrays.arr1 arr1]
+| row-size=4B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays]
+ HDFS partitions=1/1 files=1 size=1.06KB
+ predicates: !empty(functional_parquet.complextypes_arrays.arr1)
+ predicates on arr1: UNNEST(arr1) < 5, arr1.item < 5
+ row-size=16B cardinality=1.35K
+====
+# Similar as above but using the FROM clause syntax for zipping unnest.
+select id, unnest1 from (
+ select id, arr1.item as unnest1
+ from functional_parquet.complextypes_arrays a, unnest(a.arr1)) x
+where unnest1 < 5;
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+| row-size=20B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+| | row-size=20B cardinality=10
+| |
+| |--02:SINGULAR ROW SRC
+| | row-size=16B cardinality=1
+| |
+| 03:UNNEST [a.arr1]
+| row-size=4B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays a]
+ HDFS partitions=1/1 files=1 size=1.06KB
+ predicates: !empty(a.arr1)
+ predicates on a.arr1: arr1.item < 5
+ row-size=16B cardinality=1.35K
+====
+# Check that for a single unnest the predicate on the unnested items is pushed to the
+# Scan node. Queries a view instead of a table.
+select id, item from (
+ select id, unnest(arr1) from test_zipping_unnest_db.view_arrays) x
+where item < 5;
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+| row-size=20B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+| | row-size=20B cardinality=10
+| |
+| |--02:SINGULAR ROW SRC
+| | row-size=16B cardinality=1
+| |
+| 03:UNNEST [test_zipping_unnest_db.view_arrays.arr1]
+| row-size=4B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays]
+ HDFS partitions=1/1 files=1 size=1.06KB
+ predicates: !empty(arr1)
+ predicates on arr1: UNNEST(arr1) < 5, arr1.item < 5
+ row-size=16B cardinality=1.35K
+====
+# Similar as above but using the FROM clause syntax for zipping unnest.
+select id, unnest1 from (
+ select id, arr1.item as unnest1
+ from test_zipping_unnest_db.view_arrays a, unnest(a.arr1)) x
+where unnest1 < 5;
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+| row-size=20B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+| | row-size=20B cardinality=10
+| |
+| |--02:SINGULAR ROW SRC
+| | row-size=16B cardinality=1
+| |
+| 03:UNNEST [a.arr1]
+| row-size=4B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays]
+ HDFS partitions=1/1 files=1 size=1.06KB
+ predicates: !empty(arr1)
+ predicates on arr1: arr1.item < 5
+ row-size=16B cardinality=1.35K
+====
+# Check that for multiple unnests the predicate on the unnested items is pushed to the
+# Unnest node instead of the Scan node.
+select id, unnest1 from (
+ select id, unnest(arr1) unnest1, unnest(arr2) unnest2
+ from functional_parquet.complextypes_arrays) x
+where unnest1 < 5;
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+| row-size=20B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+| | row-size=20B cardinality=10
+| |
+| |--02:SINGULAR ROW SRC
+| | row-size=16B cardinality=1
+| |
+| 03:UNNEST [functional_parquet.complextypes_arrays.arr1 arr1, functional_parquet.complextypes_arrays.arr2 arr2]
+| predicates: UNNEST(arr1) < 5
+| row-size=4B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays]
+ HDFS partitions=1/1 files=1 size=1.06KB
+ row-size=16B cardinality=1.35K
+====
+# Similar as above but using the FROM clause syntax for zipping unnest.
+select id, unnest1 from (
+ select id, arr1.item as unnest1, arr2.item as unnest2
+ from functional_parquet.complextypes_arrays a, unnest(a.arr1, a.arr2)) x
+where unnest1 < 5;
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+| row-size=20B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+| | row-size=20B cardinality=10
+| |
+| |--02:SINGULAR ROW SRC
+| | row-size=16B cardinality=1
+| |
+| 03:UNNEST [a.arr1, a.arr2]
+| predicates: arr1.item < 5
+| row-size=4B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays a]
+ HDFS partitions=1/1 files=1 size=1.06KB
+ row-size=16B cardinality=1.35K
+====
+# Check that for multiple unnests the predicate on the unnested items is pushed to the
+# Unnest node instead of the Scan node. Queries a view instead of a table.
+select id, unnest1 from (
+ select id, unnest(arr1) unnest1, unnest(arr2) unnest2
+ from test_zipping_unnest_db.view_arrays) x
+where unnest1 < 5;
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+| row-size=20B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+| | row-size=20B cardinality=10
+| |
+| |--02:SINGULAR ROW SRC
+| | row-size=16B cardinality=1
+| |
+| 03:UNNEST [test_zipping_unnest_db.view_arrays.arr1, test_zipping_unnest_db.view_arrays.arr2]
+| predicates: UNNEST(arr1) < 5
+| row-size=4B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays]
+ HDFS partitions=1/1 files=1 size=1.06KB
+ row-size=16B cardinality=1.35K
+====
+# Similar as above but using the FROM clause syntax for zipping unnest.
+select id, unnest1 from (
+ select id, arr1.item as unnest1, arr2.item as unnest2
+ from test_zipping_unnest_db.view_arrays a, unnest(a.arr1, a.arr2)) x
+where unnest1 < 5;
+---- PLAN
+PLAN-ROOT SINK
+|
+01:SUBPLAN
+| row-size=20B cardinality=13.51K
+|
+|--04:NESTED LOOP JOIN [CROSS JOIN]
+| | row-size=20B cardinality=10
+| |
+| |--02:SINGULAR ROW SRC
+| | row-size=16B cardinality=1
+| |
+| 03:UNNEST [a.arr1, a.arr2]
+| predicates: arr1.item < 5
+| row-size=4B cardinality=10
+|
+00:SCAN HDFS [functional_parquet.complextypes_arrays]
+ HDFS partitions=1/1 files=1 size=1.06KB
+ row-size=16B cardinality=1.35K
+====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-from-view.test b/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-from-view.test
index 63cd79c8b..0456db078 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-from-view.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-from-view.test
@@ -1,5 +1,156 @@
====
---- QUERY
+create view view_arrays as
+ select id, arr1, arr2 from functional_parquet.complextypes_arrays;
+---- RESULTS
+'View has been created.'
+====
+---- QUERY
+# Unnest arrays from a view where there are no filters.
+select id, unnest(arr1), unnest(arr2) from view_arrays;
+---- RESULTS
+1,1,'one'
+1,2,'two'
+1,3,'three'
+1,4,'four'
+1,5,'five'
+2,1,'one'
+2,NULL,'two'
+2,3,'three'
+2,4,'NULL'
+2,5,'five'
+3,10,'ten'
+3,9,'NULL'
+3,8,'NULL'
+4,10,'ten'
+4,NULL,'nine'
+4,NULL,'eight'
+5,10,'ten'
+5,NULL,'eleven'
+5,12,'twelve'
+5,NULL,'thirteen'
+6,NULL,'str1'
+6,NULL,'str2'
+7,1,'NULL'
+7,2,'NULL'
+9,NULL,'str1'
+9,NULL,'str2'
+10,1,'NULL'
+10,2,'NULL'
+10,3,'NULL'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Unnest arrays from a view where there is a filter on an outer SELECT.
+# Note, filtering by the unnested items has to happen on the array's item.
+select id, unnest1, unnest2 from (
+ select id, unnest(arr1) as unnest1, unnest(arr2) as unnest2 from view_arrays) x
+where unnest1 < 3;
+---- RESULTS
+1,1,'one'
+1,2,'two'
+2,1,'one'
+7,1,'NULL'
+7,2,'NULL'
+10,1,'NULL'
+10,2,'NULL'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Similar as above but here the unnested field to be filtered by doesn't have an alias.
+select id, unnest1, item from (
+ select id, unnest(arr1) as unnest1, unnest(arr2) from view_arrays) x
+where item is null;
+---- RESULTS
+2,4,'NULL'
+3,9,'NULL'
+3,8,'NULL'
+7,1,'NULL'
+7,2,'NULL'
+10,1,'NULL'
+10,2,'NULL'
+10,3,'NULL'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Giving a filter on an unnested array is allowed if there is only one unnest in the
+# query.
+select id, unnest(arr1) from view_arrays
+where arr1.item > 5;
+---- RESULTS
+3,10
+3,9
+3,8
+4,10
+5,10
+5,12
+---- TYPES
+INT,INT
+====
+---- QUERY
+# Giving a filter on an unnested array is not allowed if there are multiple unnests in the
+# query.
+select id, unnest(arr1), unnest(arr2) from view_arrays
+where arr1.item > 5;
+---- CATCH
+AnalysisException: Not allowed to add a filter on an unnested array under the same select statement: arr1.item
+====
+---- QUERY
+create view view_arrays_with_filter as
+ select id, arr1, arr2 from functional_orc_def.complextypes_arrays where id < 5;
+---- RESULTS
+'View has been created.'
+====
+---- QUERY
+# Unnest arrays from a view where there is a filter within the view.
+select id, unnest(arr1), unnest(arr2) from view_arrays_with_filter;
+---- RESULTS
+1,1,'one'
+1,2,'two'
+1,3,'three'
+1,4,'four'
+1,5,'five'
+2,1,'one'
+2,NULL,'two'
+2,3,'three'
+2,4,'NULL'
+2,5,'five'
+3,10,'ten'
+3,9,'NULL'
+3,8,'NULL'
+4,10,'ten'
+4,NULL,'nine'
+4,NULL,'eight'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Unnest arrays from a view where there are filters both within the view and on the outer
+# select.
+select id, unnest1, unnest2 from (
+ select id, unnest(arr1) unnest1, unnest(arr2) unnest2 from view_arrays_with_filter) x
+where unnest2 is not null;
+---- RESULTS
+1,1,'one'
+1,2,'two'
+1,3,'three'
+1,4,'four'
+1,5,'five'
+2,1,'one'
+2,NULL,'two'
+2,3,'three'
+2,5,'five'
+3,10,'ten'
+4,10,'ten'
+4,NULL,'nine'
+4,NULL,'eight'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
create view view_unnested_arrays as
select id, unnest(arr1) as arr1_unnested, unnest(arr2) as arr2_unnested
from functional_orc_def.complextypes_arrays;
@@ -45,12 +196,287 @@ INT,INT,STRING
---- QUERY
# Same as above but there is a filter in the outer select.
select id, arr1_unnested, arr2_unnested from view_unnested_arrays
-where arr1_unnested > 5;
+where arr1_unnested >= 5 and length(arr2_unnested) > 3;
+---- RESULTS
+1,5,'five'
+2,5,'five'
+5,12,'twelve'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Create a wrapper view around an existing view that in turn contains arrays.
+# Adds an additional filter on the underlying view.
+create view nested_view_arrays as
+ select id, arr1, arr2 from view_arrays
+ where id % 2 = 1;
+---- RESULTS
+'View has been created.'
+====
+---- QUERY
+# Do an unnest where the array is in a nested view.
+select id, unnest(arr1), unnest(arr2) from nested_view_arrays;
+---- RESULTS
+1,1,'one'
+1,2,'two'
+1,3,'three'
+1,4,'four'
+1,5,'five'
+3,10,'ten'
+3,9,'NULL'
+3,8,'NULL'
+5,10,'ten'
+5,NULL,'eleven'
+5,12,'twelve'
+5,NULL,'thirteen'
+7,1,'NULL'
+7,2,'NULL'
+9,NULL,'str1'
+9,NULL,'str2'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# In this query the array has different unique aliases in the different levels:
+# 'x.arr1', 'arr1'
+select id, unnest(arr1) unnest1, unnest(arr2) from (
+ select id, arr1, arr2 from nested_view_arrays where id < 5) x;
---- RESULTS
+1,1,'one'
+1,2,'two'
+1,3,'three'
+1,4,'four'
+1,5,'five'
+3,10,'ten'
+3,9,'NULL'
+3,8,'NULL'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Similar as above but apply an additional filter on the select. The filter is on a
+# non-unnested column.
+select id, unnest(arr1), unnest(arr2) from nested_view_arrays
+where id > 4;
+---- RESULTS
+5,10,'ten'
+5,NULL,'eleven'
+5,12,'twelve'
+5,NULL,'thirteen'
+7,1,'NULL'
+7,2,'NULL'
+9,NULL,'str1'
+9,NULL,'str2'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Similar as above but here the filter is on a column that is being unnested in the same
+# select. This is not allowed.
+select id, unnest(arr1), unnest(arr2) from nested_view_arrays
+where arr1.item = 4;
+---- CATCH
+AnalysisException: Not allowed to add a filter on an unnested array under the same select statement: arr1.item
+====
+---- QUERY
+# Similar as above but here the filter is in an outer select.
+select id, unnest1, unnest2 from (
+ select id, unnest(arr1) unnest1, unnest(arr2) unnest2 from nested_view_arrays) x
+where unnest1 = 4;
+---- RESULTS
+1,4,'four'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Create a wrapper view around an existing view that in turn contains arrays.
+# This wrapper view unnests the arrays from the underlying view.
+create view nested_view_unnested_arrays as
+ select id, unnest(arr1) as arr1_unnested, unnest(arr2) as arr2_unnested
+ from view_arrays
+ where id % 2 = 1;
+---- RESULTS
+'View has been created.'
+====
+---- QUERY
+# Query the nested view where one view gives the arrays and the other unnests them.
+select id, arr1_unnested, arr2_unnested from nested_view_unnested_arrays;
+---- RESULTS
+1,1,'one'
+1,2,'two'
+1,3,'three'
+1,4,'four'
+1,5,'five'
+3,10,'ten'
+3,9,'NULL'
+3,8,'NULL'
+5,10,'ten'
+5,NULL,'eleven'
+5,12,'twelve'
+5,NULL,'thirteen'
+7,1,'NULL'
+7,2,'NULL'
+9,NULL,'str1'
+9,NULL,'str2'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Similar as above but with a where filter on the outermost select.
+select id, arr1_unnested, arr2_unnested from nested_view_unnested_arrays
+where arr1_unnested > 7;
+---- RESULTS
+3,10,'ten'
+3,9,'NULL'
+3,8,'NULL'
+5,10,'ten'
+5,12,'twelve'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# The queries below will test the syntax where the unnest is in the from clause.
+# WHERE filter is not allowed when multiple arrays are being unnested in the same SELECT
+# statement.
+select id, arr1.item from
+ view_arrays va, unnest(va.arr1, va.arr2)
+where arr1.item < 3;
+---- CATCH
+AnalysisException: Not allowed to add a filter on an unnested array under the same select statement: arr1.item
+====
+---- QUERY
+# Similar as above but here only one array is unnested, hence the WHERE filter is allowed.
+select id, arr1.item from
+ view_arrays va, unnest(va.arr1)
+where arr1.item < 3;
+---- RESULTS
+1,1
+1,2
+2,1
+7,1
+7,2
+10,1
+10,2
+---- TYPES
+INT,INT
+====
+---- QUERY
+# An outer select has a WHERE filter while an inner select unnests multiple arrays.
+select id, unnest1, unnest2 from
+ (select id, arr1.item as unnest1, arr2.item as unnest2 from
+ functional_parquet.complextypes_arrays va,
+ unnest(va.arr1, va.arr2)) x
+where unnest1 < 3;
+---- RESULTS
+1,1,'one'
+1,2,'two'
+2,1,'one'
+7,1,'NULL'
+7,2,'NULL'
+10,1,'NULL'
+10,2,'NULL'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Unnest arrays from a view where there is a filter within the view.
+select id, arr1.item, arr2.item from view_arrays_with_filter va, unnest(va.arr1, va.arr2);
+---- RESULTS
+1,1,'one'
+1,2,'two'
+1,3,'three'
+1,4,'four'
+1,5,'five'
+2,1,'one'
+2,NULL,'two'
+2,3,'three'
+2,4,'NULL'
+2,5,'five'
3,10,'ten'
3,9,'NULL'
3,8,'NULL'
4,10,'ten'
+4,NULL,'nine'
+4,NULL,'eight'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+create view view_unnested_arrays2 as
+ select id, arr1.item as arr1_unnested, arr2.item as arr2_unnested
+ from functional_orc_def.complextypes_arrays va, unnest(va.arr1, va.arr2);
+---- RESULTS
+'View has been created.'
+====
+---- QUERY
+# Query from a view that does the unnesting itself. Apply a filter on top of the view.
+select id, arr1_unnested, arr2_unnested from view_unnested_arrays2
+where arr1_unnested >= 5 and length(arr2_unnested) > 3;
+---- RESULTS
+1,5,'five'
+2,5,'five'
+5,12,'twelve'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Unnest arrays that are coming from nested views.
+select id, arr1.item, arr2.item from nested_view_arrays va, unnest(va.arr1, va.arr2)
+where id > 4;
+---- RESULTS
+5,10,'ten'
+5,NULL,'eleven'
+5,12,'twelve'
+5,NULL,'thirteen'
+7,1,'NULL'
+7,2,'NULL'
+9,NULL,'str1'
+9,NULL,'str2'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Create a wrapper view around an existing view that in turn contains arrays.
+# This wrapper view unnests the arrays from the underlying view using the syntax where
+# the unnest is in the from clause.
+create view nested_view_unnested_arrays2 as
+ select id, arr1.item as arr1_unnested, arr2.item as arr2_unnested
+ from view_arrays va, unnest(va.arr1, va.arr2)
+ where id % 2 = 1;
+---- RESULTS
+'View has been created.'
+====
+---- QUERY
+# Query the nested view where one view gives the arrays and the other unnests them.
+select id, arr1_unnested, arr2_unnested from nested_view_unnested_arrays2;
+---- RESULTS
+1,1,'one'
+1,2,'two'
+1,3,'three'
+1,4,'four'
+1,5,'five'
+3,10,'ten'
+3,9,'NULL'
+3,8,'NULL'
+5,10,'ten'
+5,NULL,'eleven'
+5,12,'twelve'
+5,NULL,'thirteen'
+7,1,'NULL'
+7,2,'NULL'
+9,NULL,'str1'
+9,NULL,'str2'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# Similar as above but with a where filter on the outermost select.
+select id, arr1_unnested, arr2_unnested from nested_view_unnested_arrays2
+where arr1_unnested > 7;
+---- RESULTS
+3,10,'ten'
+3,9,'NULL'
+3,8,'NULL'
5,10,'ten'
5,12,'twelve'
---- TYPES
diff --git a/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-select-list.test
index 64a9621e7..2cbeea134 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-select-list.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/zipping-unnest-in-select-list.test
@@ -156,7 +156,7 @@ AnalysisException: Providing zipping and joining unnests together is not support
---- QUERY
# Zipping and joining unnests are given together where a single table ref is in the FROM
# clause with absolute path.
-select unnest(a) from complextypestbl.int_array_array;
+select unnest(item) from complextypestbl.int_array_array;
---- CATCH
AnalysisException: Providing zipping and joining unnests together is not supported.
====
@@ -173,6 +173,24 @@ select unnest(arr1) from complextypes_arrays t, unnest(t.arr2);
AnalysisException: Providing zipping unnest both in the SELECT list and in the FROM clause is not supported.
====
---- QUERY
+# WHERE filter on an unnested array in the same SELECT statement is not allowed.
+select id, unnest(arr1), unnest(arr2) from complextypes_arrays where arr1.item = 1;
+---- CATCH
+AnalysisException: Not allowed to add a filter on an unnested array under the same select statement: arr1.item
+====
+---- QUERY
+select id, unnest(arr1), unnest(arr2) from complextypes_arrays
+where id < 3 and length(arr2.item) < 5;
+---- CATCH
+AnalysisException: Not allowed to add a filter on an unnested array under the same select statement: arr2.item
+====
+---- QUERY
+select id, unnest(arr1), unnest(arr2) from complextypes_arrays
+where 3 * arr1.item > 8;
+---- CATCH
+AnalysisException: Not allowed to add a filter on an unnested array under the same select statement: arr1.item
+====
+---- QUERY
# Do an unnest on the outer layer of a nested array.
select unnest(int_array_array) from complextypestbl;
---- RESULTS
@@ -196,3 +214,19 @@ STRING
select unnest(int_array_array.item) from complextypestbl;
---- CATCH
AnalysisException: Illegal column/field reference 'int_array_array.item' with intermediate collection 'int_array_array' of type 'ARRAY<ARRAY<INT>>'
+====
+---- QUERY
+# Unnest the same array multiple times. Check that the where filter is still allowed even
+# though there are multiple unnest, but they are for the same array.
+select id, unnest(arr1), unnest(arr1) from complextypes_arrays
+where arr1.item < 3;
+---- RESULTS
+1,1,1
+1,2,2
+2,1,1
+7,1,1
+7,2,2
+10,1,1
+10,2,2
+---- TYPES
+INT,INT,INT