You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by db...@apache.org on 2022/05/02 07:26:26 UTC

[impala] branch master updated: IMPALA-10838: Fix substitution and improve unification of struct slots

This is an automated email from the ASF dual-hosted git repository.

dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 9baf79060 IMPALA-10838: Fix substitution and improve unification of struct slots
9baf79060 is described below

commit 9baf790606073d88c3a2fd431110812140df0cb7
Author: Daniel Becker <da...@cloudera.com>
AuthorDate: Wed Sep 15 09:51:21 2021 +0200

    IMPALA-10838: Fix substitution and improve unification of struct slots
    
    The following query fails:
    '''
    with sub as (
        select id, outer_struct
        from functional_orc_def.complextypes_nested_structs)
    select sub.id, sub.outer_struct.inner_struct2 from sub;
    '''
    
    with the following error:
    '''
    ERROR: IllegalStateException: Illegal reference to non-materialized
    tuple: debugname=InlineViewRef sub alias=sub tid=6
    '''
    
    while if 'outer_struct.inner_struct2' is added to the select list of the
    inline view, the query works as expected.
    
    This change fixes the problem by two modifications:
      - if a field of a struct needs to be materialised, also materialise
        all of its enclosing structs (ancestors)
      - in InlineViewRef, struct fields are inserted into the 'smap' and
        'baseTableSmap' with the appropriate inline view prefix
    
    This change also changes the way struct fields are materialised: until
    now, if a member of a struct was needed to be materialised, the whole
    struct, including other members of the struct were materialised. This
    behaviour can lead to using significantly more memory than necessary if
    we for example query a single member of a large struct. This change
    modifies this behaviour so that we only materialise the struct members
    that are actually needed.
    
    Tests:
      - added queries that are fixed by this change (including the one
        above) in nested-struct-in-select-list.test
      - added a planner test in
        fe/src/test/java/org/apache/impala/planner/PlannerTest.java that
        asserts that only the required parts of structs are materialised
    
    Change-Id: Iadb9233677355b85d424cc3f22b00b5a3bf61c57
    Reviewed-on: http://gerrit.cloudera.org:8080/17847
    Reviewed-by: Daniel Becker <da...@cloudera.com>
    Tested-by: Daniel Becker <da...@cloudera.com>
---
 .../java/org/apache/impala/analysis/Analyzer.java  | 218 +++++++++++++++--
 .../apache/impala/analysis/CollectionTableRef.java |   4 +-
 .../apache/impala/analysis/DescriptorTable.java    |  19 +-
 .../main/java/org/apache/impala/analysis/Expr.java |  12 +-
 .../impala/analysis/ExprSubstitutionMap.java       |  27 ++-
 .../org/apache/impala/analysis/InlineViewRef.java  |  85 ++++++-
 .../main/java/org/apache/impala/analysis/Path.java |  20 ++
 .../org/apache/impala/analysis/SelectStmt.java     |  76 ++++++
 .../org/apache/impala/analysis/SlotDescriptor.java |  60 +++++
 .../java/org/apache/impala/analysis/SlotRef.java   | 111 ++++-----
 .../java/org/apache/impala/analysis/SortInfo.java  |   4 +-
 .../apache/impala/analysis/TupleDescriptor.java    |  20 +-
 .../org/apache/impala/analysis/AnalyzerTest.java   |   3 +-
 .../org/apache/impala/planner/PlannerTest.java     |  78 ++++++
 .../org/apache/impala/planner/PlannerTestBase.java |  25 ++
 .../QueryTest/nested-struct-in-select-list.test    | 261 +++++++++++++++++++++
 16 files changed, 917 insertions(+), 106 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index 03a35d419..d9dad77ae 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -30,8 +30,11 @@ import java.util.LinkedHashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
 import java.util.Set;
 import java.util.function.Function;
+import java.util.stream.Stream;
+import java.util.stream.Collectors;
 
 import org.apache.impala.analysis.Path.PathType;
 import org.apache.impala.analysis.StmtMetadataLoader.StmtTableCache;
@@ -58,6 +61,8 @@ import org.apache.impala.catalog.FeTable;
 import org.apache.impala.catalog.FeView;
 import org.apache.impala.catalog.KuduTable;
 import org.apache.impala.catalog.MaterializedViewHdfsTable;
+import org.apache.impala.catalog.StructField;
+import org.apache.impala.catalog.StructType;
 import org.apache.impala.catalog.TableLoadingException;
 import org.apache.impala.catalog.Type;
 import org.apache.impala.catalog.local.LocalKuduTable;
@@ -612,8 +617,8 @@ public class Analyzer {
   private final Set<String> ambiguousAliases_ = new HashSet<>();
 
   // Map from lowercase fully-qualified path to its slot descriptor. Only contains paths
-  // that have a scalar type as destination (see registerSlotRef()).
-  private final Map<String, SlotDescriptor> slotPathMap_ = new HashMap<>();
+  // that have a scalar or struct type as destination (see registerSlotRef()).
+  private final Map<List<String>, SlotDescriptor> slotPathMap_ = new HashMap<>();
 
   // Indicates whether this analyzer/block is guaranteed to have an empty result set
   // due to a limit 0 or constant conjunct evaluating to false.
@@ -1114,9 +1119,9 @@ public class Analyzer {
   }
 
   /**
-   * Given a "table alias"."column alias", return the SlotDescriptor
+   * Given a list of {"table alias", "column alias"}, return the SlotDescriptor.
    */
-  public SlotDescriptor getSlotDescriptor(String qualifiedColumnName) {
+  public SlotDescriptor getSlotDescriptor(List<String> qualifiedColumnName) {
     return slotPathMap_.get(qualifiedColumnName);
   }
 
@@ -1433,27 +1438,202 @@ public class Analyzer {
     if (slotPath.destType().isCollectionType() && duplicateIfCollections) {
       // Register a new slot descriptor for collection types. The BE currently
       // relies on this behavior for setting unnested collection slots to NULL.
-      return registerNewSlotRef(slotPath);
+      return createAndRegisterSlotDesc(slotPath, false);
     }
-    // SlotRefs with a scalar or struct types are registered against the slot's
+    // SlotRefs with scalar or struct types are registered against the slot's
     // fully-qualified lowercase path.
-    String key = slotPath.toString();
-    Preconditions.checkState(key.equals(key.toLowerCase()),
+    List<String> key = slotPath.getFullyQualifiedRawPath();
+    Preconditions.checkState(key.stream().allMatch(s -> s.equals(s.toLowerCase())),
         "Slot paths should be lower case: " + key);
     SlotDescriptor existingSlotDesc = slotPathMap_.get(key);
     if (existingSlotDesc != null) return existingSlotDesc;
-    SlotDescriptor result = null;
+
     if (slotPath.destType().isArrayType()) {
-      result = registerArraySlotRef(slotPath);
-    } else {
-      result = addSlotDescriptor(slotPath.getRootDesc());
+      SlotDescriptor result = registerArraySlotRef(slotPath);
+      result.setPath(slotPath);
+      slotPathMap_.put(slotPath.getFullyQualifiedRawPath(), result);
+      registerColumnPrivReq(result);
+      return result;
+    }
+
+    final int highestAncestorDistance = getHighestLevelAncestorDistance(slotPath);
+    if (highestAncestorDistance > 0 && isOnlyWithinStructs(slotPath)) {
+      // 'slotPath' is within a struct that is also needed in the query.
+      return registerSlotRefWithinStruct(slotPath, highestAncestorDistance);
+    }
+
+    Preconditions.checkState(highestAncestorDistance == 0);
+    if (slotPath.destType().isStructType()) {
+      // 'slotPath' refers to a struct that has no ancestor that is also needed in the
+      // query. We also create the child slot descriptors.
+      SlotDescriptor result = createAndRegisterSlotDesc(slotPath, true);
+      createStructTuplesAndSlotDescs(result);
+      return result;
+    }
+
+    SlotDescriptor result = createAndRegisterSlotDesc(slotPath, true);
+    return result;
+  }
+
+  // Returns whether all ancestors of 'slotPath' are structs (and for example not arrays).
+  private static boolean isOnlyWithinStructs(Path slotPath) {
+    List<Type> matchedTypes = slotPath.getMatchedTypes();
+    List<Type> ancestorTypes = matchedTypes.subList(0, matchedTypes.size() - 1);
+    for (Type ancestorType : ancestorTypes) {
+      if (!ancestorType.isStructType()) return false;
+    }
+    return true;
+  }
+
+  /**
+   * If there is
+   *   a) a struct expression and
+   *   b) another expression that is a (possibly multiply) embedded field in the
+   *      struct expresssion
+   * in the select list or any other part of the query (WHERE clause, sorting etc.), we
+   * would like the tuple memory corresponding to the embedded expression to be shared
+   * between the struct expression and the embedded expression. Therefore, if we get the
+   * path of the embedded expression, we should return a slot descriptor that is within
+   * the tree of the struct expression.
+   *
+   * This is easy if we encounter the path of the struct expression first and the path of
+   * the embedded expression later: when we encounter the path of the embedded expression
+   * we simply traverse the tree of slot and tuple descriptors belonging to the struct
+   * expression and return the 'SlotDescriptor' corresponding to the embedded expression.
+   *
+   * If the order was reversed, the 'SlotDescriptor' for the ancestor struct wouldn't yet
+   * exist at the time we encountered the embedded expression. Therefore, in the
+   * 'SelectStmt' class we make sure that all struct paths are registered in descending
+   * order of raw path length (meaning the number of path elements, not string length):
+   * see 'SelectStmt.registerStructSlotRefPathsWithAnalyzer()'.
+   *
+   * If we encounter a path that is within a struct, we find the highest level enclosing
+   * struct that is needed in the query. This is not necessarily the top level struct
+   * enclosing the embedded field as that may not be used in the query - take the
+   * following example:
+   *     outer: struct<middle: struct<inner: struct<field: int>>>
+   * and suppose that 'outer' is not present in the query but 'field' and 'middle' are
+   * both in the select list. The highest level enclosing struct for 'field' that is
+   * present in the query is 'middle' in this case.
+   *
+   * We traverse the tree of the highest level enclosing struct expression ('middle' in
+   * the example) to find the 'SlotDescriptor' corresponding to the original path ('field'
+   * in the example) and return it.
+   *
+   * The parameter 'slotPath' is the path that we want to register and return the
+   * 'SlotDescriptor' for; 'highestAncestorDistance' is the distance in the expression
+   * tree from 'slotPath' to the highest level ancestor present in the query: in the
+   * example it is the distance from 'field' to 'middle', which is 1.
+   */
+  private SlotDescriptor registerSlotRefWithinStruct(Path slotPath,
+      int highestAncestorDistance) throws AnalysisException {
+    Preconditions.checkState(highestAncestorDistance > 0);
+
+    final List<String> rawPath = slotPath.getRawPath();
+    final int topStructPathLen = rawPath.size() - highestAncestorDistance;
+    Preconditions.checkState(topStructPathLen > 0);
+    final Path topStructPath = new Path(slotPath.getRootDesc(),
+        rawPath.subList(0, topStructPathLen));
+    Path topStructResolvedPath = null;
+    try {
+      topStructResolvedPath = resolvePathWithMasking(topStructPath.getRawPath(),
+          PathType.SLOT_REF);
+    } catch (TableLoadingException e) {
+      // Should never happen because we only check registered table aliases.
+      Preconditions.checkState(false);
     }
+    Preconditions.checkState(topStructResolvedPath != null);
+    Preconditions.checkState(topStructResolvedPath.isResolved());
+    List<String> topStructKey = topStructResolvedPath.getFullyQualifiedRawPath();
+    SlotDescriptor topStructSlotDesc = slotPathMap_.get(topStructKey);
+
+    // Structs should already be registered when their members are registered.
+    Preconditions.checkNotNull(topStructSlotDesc);
+
+    List<String> remainingPath = rawPath.subList(topStructPathLen, rawPath.size());
+    SlotDescriptor childSlotRef = findChildSlotDesc(topStructSlotDesc, remainingPath);
+    Preconditions.checkState(childSlotRef != null);
+    return childSlotRef;
+  }
+
+  private SlotDescriptor createAndRegisterSlotDesc(Path slotPath,
+      boolean insertIntoSlotPathMap) {
+    Preconditions.checkState(slotPath.isRootedAtTuple());
+    final SlotDescriptor result = addSlotDescriptor(slotPath.getRootDesc());
     result.setPath(slotPath);
-    slotPathMap_.put(key, result);
+    if (insertIntoSlotPathMap) {
+      slotPathMap_.put(slotPath.getFullyQualifiedRawPath(), result);
+    }
     registerColumnPrivReq(result);
     return result;
   }
 
+  private int getHighestLevelAncestorDistance(Path slotPath) {
+    Optional<Integer> greatestDistanceAncestor = slotPathMap_.entrySet().stream()
+      .filter(entry -> entry.getValue().getType().isStructType()) // only structs
+      .map(entry -> entry.getKey()) // take only the paths, not the SlotDescriptors
+      .map(parentPath -> Path.getRawPathWithoutPrefix(slotPath.getFullyQualifiedRawPath(),
+            parentPath)) // null for non-ancestors
+      .filter(remainingPath -> remainingPath != null) // only ancestors remain
+      .map(remainingPath -> remainingPath.size()) // distance from ancestor
+      .max(java.util.Comparator.naturalOrder());
+
+    if (greatestDistanceAncestor.isPresent()) {
+      return greatestDistanceAncestor.get();
+    } else {
+      return 0; // There is no ancestor, this is a top level path within the query.
+    }
+  }
+
+  public void createStructTuplesAndSlotDescs(SlotDescriptor desc)
+      throws AnalysisException {
+    Preconditions.checkState(desc.getType().isStructType());
+    TupleDescriptor structTuple = getDescTbl().createTupleDescriptor("struct_tuple");
+    Path slotPath = desc.getPath();
+    if (slotPath != null) structTuple.setPath(slotPath);
+    final StructType type = (StructType) desc.getType();
+    structTuple.setType(type);
+    structTuple.setParentSlotDesc(desc);
+    desc.setItemTupleDesc(structTuple);
+    for (StructField structField : type.getFields()) {
+      SlotDescriptor childDesc = getDescTbl().addSlotDescriptor(structTuple);
+      globalState_.blockBySlot.put(childDesc.getId(), this);
+      // 'slotPath' could be null e.g. when the query has an order by clause and
+      // this is the sorting tuple.
+      if (slotPath != null) {
+        Path relPath = Path.createRelPath(slotPath, structField.getName());
+        relPath.resolve();
+        childDesc.setPath(relPath);
+        registerColumnPrivReq(childDesc);
+        slotPathMap_.putIfAbsent(relPath.getFullyQualifiedRawPath(), childDesc);
+      }
+      childDesc.setType(structField.getType());
+
+      if (childDesc.getType().isStructType()) {
+        createStructTuplesAndSlotDescs(childDesc);
+      }
+    }
+  }
+
+  private SlotDescriptor findChildSlotDesc(SlotDescriptor parent, List<String> path) {
+    if (path.isEmpty()) return parent;
+
+    final TupleDescriptor tuple = parent.getItemTupleDesc();
+    if (tuple == null) return null;
+
+    final int parentPathLen = parent.getPath().getRawPath().size();
+    SlotDescriptor childSlotDesc = null;
+    for (SlotDescriptor desc : tuple.getSlots()) {
+      if (desc.getPath().getRawPath().get(parentPathLen).equals(path.get(0))) {
+        childSlotDesc = desc;
+        break;
+      }
+    }
+
+    if (childSlotDesc == null) return null;
+    return findChildSlotDesc(childSlotDesc, path.subList(1, path.size()));
+  }
+
   /**
    * Registers an array and its descendants.
    * Creates a CollectionTableRef for all collections on the path.
@@ -1502,14 +1682,6 @@ public class Analyzer {
     return desc;
   }
 
-  private SlotDescriptor registerNewSlotRef(Path slotPath) throws AnalysisException {
-    Preconditions.checkState(slotPath.isRootedAtTuple());
-    SlotDescriptor result = addSlotDescriptor(slotPath.getRootDesc());
-    result.setPath(slotPath);
-    registerColumnPrivReq(result);
-    return result;
-  }
-
   /**
    * Registers a column-level privilege request if 'slotDesc' directly or indirectly
    * refers to a table column. It handles both scalar and complex-typed columns.
@@ -2556,7 +2728,7 @@ public class Analyzer {
       // columns because mappings to non-partition columns do not provide
       // a performance benefit.
       SlotId srcSid = srcSids.get(0);
-      for (SlotDescriptor destSlot: destTupleDesc.getSlots()) {
+      for (SlotDescriptor destSlot: destTupleDesc.getSlotsRecursively()) {
         if (ignoreSlots.contains(destSlot.getId())) continue;
         if (hasValueTransfer(srcSid, destSlot.getId())) {
           allDestSids.add(Lists.newArrayList(destSlot.getId()));
@@ -2578,7 +2750,7 @@ public class Analyzer {
       // The limitations are show in predicate-propagation.test.
       List<SlotId> destSids = new ArrayList<>();
       for (SlotId srcSid: srcSids) {
-        for (SlotDescriptor destSlot: destTupleDesc.getSlots()) {
+        for (SlotDescriptor destSlot: destTupleDesc.getSlotsRecursively()) {
           if (ignoreSlots.contains(destSlot.getId())) continue;
           if (hasValueTransfer(srcSid, destSlot.getId())
               && !destSids.contains(destSlot.getId())) {
diff --git a/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java b/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java
index f22bb2609..efacd578b 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java
@@ -130,8 +130,8 @@ public class CollectionTableRef extends TableRef {
         // substituted in SelectStmt.resolveInlineViewRefs()
         // TODO: currently we cannot use the same array twice (e.g. self join) in this
         //       case
-        SlotDescriptor parentSlotDesc =
-            analyzer.getSlotDescriptor(resolvedPath_.toString());
+        SlotDescriptor parentSlotDesc = analyzer.getSlotDescriptor(
+            resolvedPath_.getFullyQualifiedRawPath());
         collectionExpr_ = new SlotRef(parentSlotDesc);
         collectionExpr_ =
             collectionExpr_.trySubstitute(sourceView.getBaseTblSmap(), analyzer, true);
diff --git a/fe/src/main/java/org/apache/impala/analysis/DescriptorTable.java b/fe/src/main/java/org/apache/impala/analysis/DescriptorTable.java
index 783a3a746..399eddac5 100644
--- a/fe/src/main/java/org/apache/impala/analysis/DescriptorTable.java
+++ b/fe/src/main/java/org/apache/impala/analysis/DescriptorTable.java
@@ -141,24 +141,23 @@ public class DescriptorTable {
       SlotDescriptor slotDesc = getSlotDesc(id);
       if (slotDesc.isMaterialized()) continue;
       slotDesc.setIsMaterialized(true);
+      // If we are inside a struct, we need to materialise all enclosing struct
+      // SlotDescriptors, otherwise these descriptors will be hanging in the air.
+      materializeParentStructSlots(slotDesc);
       // Don't add the TupleDescriptor that is for struct children.
       if (slotDesc.getParent().getParentSlotDesc() == null) {
         affectedTuples.add(slotDesc.getParent());
       }
-      if (slotDesc.getType().isStructType()) {
-        TupleDescriptor childrenTuple = slotDesc.getItemTupleDesc();
-        Preconditions.checkNotNull(childrenTuple);
-        Preconditions.checkState(childrenTuple.getSlots().size() > 0);
-        List<SlotId> childrenIds = Lists.newArrayList();
-        for (SlotDescriptor childSlot : childrenTuple.getSlots()) {
-          childrenIds.add(childSlot.getId());
-        }
-        markSlotsMaterialized(childrenIds);
-      }
     }
     return affectedTuples;
   }
 
+  private void materializeParentStructSlots(SlotDescriptor desc) {
+    for (SlotDescriptor slotDesc : desc.getEnclosingStructSlotDescs()) {
+      slotDesc.setIsMaterialized(true);
+    }
+  }
+
   /**
    * Computes physical layout parameters of all descriptors.
    * Call this only after the last descriptor was added.
diff --git a/fe/src/main/java/org/apache/impala/analysis/Expr.java b/fe/src/main/java/org/apache/impala/analysis/Expr.java
index fe11c2b37..3401aed3f 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Expr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Expr.java
@@ -1142,14 +1142,16 @@ abstract public class Expr extends TreeNode<Expr> implements ParseNode, Cloneabl
       Expr substExpr = smap.get(this);
       if (substExpr != null) return substExpr.clone();
     }
+    substituteImplOnChildren(smap, analyzer);
+    resetAnalysisState();
+    return this;
+  }
+
+  protected final void substituteImplOnChildren(ExprSubstitutionMap smap,
+      Analyzer analyzer) {
     for (int i = 0; i < children_.size(); ++i) {
       children_.set(i, children_.get(i).substituteImpl(smap, analyzer));
     }
-    // SlotRefs must remain analyzed to support substitution across query blocks. All
-    // other exprs must be analyzed again after the substitution to add implicit casts
-    // and for resolving their correct function signature.
-    if (!(this instanceof SlotRef)) resetAnalysisState();
-    return this;
   }
 
   /**
diff --git a/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java b/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java
index 6d2fe9c36..74262490c 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ExprSubstitutionMap.java
@@ -232,10 +232,16 @@ public final class ExprSubstitutionMap {
    * Remove expressions that are not used according to baseTblSMap
    */
   public void trim(ExprSubstitutionMap baseTblSMap, Analyzer analyzer) {
-      Preconditions.checkState(size() == baseTblSMap.size());
-      for (int i = size() - 1; i >= 0; --i) {
-        List<SlotId> slotIds = new ArrayList<>();
-        baseTblSMap.getRhs().get(i).getIds(null, slotIds);
+    Preconditions.checkState(size() == baseTblSMap.size());
+    for (int i = size() - 1; i >= 0; --i) {
+      final Expr baseTblExpr = baseTblSMap.getRhs().get(i);
+      List<SlotId> slotIds = new ArrayList<>();
+      baseTblExpr.getIds(null, slotIds);
+      // Struct children are allowed to be non-materialised because the query may only
+      // concern a subset of the fields of the struct. In this case we do not remove the
+      // entry from this 'ExprSubstitutionMap', only if none of the children are
+      // materialised.
+      if (!baseTblExpr.getType().isStructType()) {
         for (SlotId id: slotIds) {
           if (!analyzer.getSlotDesc(id).isMaterialized()) {
             lhs_.remove(i);
@@ -243,6 +249,19 @@ public final class ExprSubstitutionMap {
             break;
           }
         }
+      } else { // If it is a struct, remove iff none of the children are materialised.
+        boolean foundMaterialized = false;
+        for (SlotId id: slotIds) {
+          if (analyzer.getSlotDesc(id).isMaterialized()) {
+            foundMaterialized = true;
+            break;
+          }
+        }
+        if (!foundMaterialized) {
+          lhs_.remove(i);
+          rhs_.remove(i);
+        }
       }
+    }
   }
 }
diff --git a/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java b/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java
index a34c21aaf..a91070e85 100644
--- a/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java
@@ -20,7 +20,6 @@ package org.apache.impala.analysis;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Set;
-import java.util.stream.Collectors;
 
 import org.apache.impala.authorization.AuthorizationContext;
 import org.apache.impala.authorization.TableMask;
@@ -32,6 +31,7 @@ import org.apache.impala.catalog.StructField;
 import org.apache.impala.catalog.StructType;
 import org.apache.impala.common.AnalysisException;
 import org.apache.impala.common.InternalException;
+import org.apache.impala.common.Pair;
 import org.apache.impala.rewrite.ExprRewriter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -325,6 +325,89 @@ public class InlineViewRef extends TableRef {
         }
       }
     }
+
+    if (colExpr.getType().isStructType()) {
+      Preconditions.checkState(colExpr instanceof SlotRef);
+      Preconditions.checkState(baseTableExpr instanceof SlotRef);
+      putStructMembersIntoSmap(smap_, slotDesc, (SlotRef) colExpr);
+      putStructMembersIntoSmap(baseTblSmap_, slotDesc, (SlotRef) baseTableExpr);
+      createAuxPredicatesForStructMembers(analyzer, slotDesc, (SlotRef) colExpr);
+    }
+  }
+
+  // Puts the fields of 'rhsStruct' into 'smap' as right hand side (mapped) values,
+  // recursively. The keys (left hand side values) are constructed based on the
+  // corresponding slot descriptors in the itemTupleDesc of 'lhsSlotDesc'.
+  private void putStructMembersIntoSmap(ExprSubstitutionMap smap,
+      SlotDescriptor lhsSlotDesc, SlotRef rhsStruct) {
+    for (Pair<SlotDescriptor, SlotRef> pair :
+        getStructSlotDescSlotRefPairs(lhsSlotDesc, rhsStruct)) {
+      SlotDescriptor lhs = pair.first;
+      SlotRef rhs = pair.second;
+      smap.put(new SlotRef(lhs), rhs);
+    }
+  }
+
+  private void createAuxPredicatesForStructMembers(Analyzer analyzer,
+      SlotDescriptor structSlotDesc, SlotRef structExpr) {
+    for (Pair<SlotDescriptor, SlotRef> pair :
+        getStructSlotDescSlotRefPairs(structSlotDesc, structExpr)) {
+      SlotDescriptor structMemberSlotDesc = pair.first;
+      SlotRef structMemberExpr = pair.second;
+
+      if (createAuxPredicate(structMemberExpr)) {
+        analyzer.createAuxEqPredicate(
+            new SlotRef(structMemberSlotDesc), structMemberExpr.clone());
+      }
+    }
+  }
+
+  /**
+   * Given a slot desc and a SlotRef expression, both referring to the same struct,
+   * returns a list of corresponding SlotDescriptor/SlotRef pairs of the struct members,
+   * recursively.
+   */
+  private List<Pair<SlotDescriptor, SlotRef>> getStructSlotDescSlotRefPairs(
+      SlotDescriptor structSlotDesc, SlotRef structExpr) {
+    Preconditions.checkState(structSlotDesc.getType().isStructType());
+    Preconditions.checkState(structExpr.getType().isStructType());
+    Preconditions.checkState(structSlotDesc.getType().equals(structExpr.getType()));
+
+    List<Pair<SlotDescriptor, SlotRef>> result = new ArrayList<>();
+
+    TupleDescriptor lhsItemTupleDesc = structSlotDesc.getItemTupleDesc();
+    Preconditions.checkNotNull(lhsItemTupleDesc);
+    List<SlotDescriptor> lhsChildSlotDescs = lhsItemTupleDesc.getSlots();
+    Preconditions.checkState(
+        lhsChildSlotDescs.size() == structExpr.getChildren().size());
+    for (int i = 0; i < lhsChildSlotDescs.size(); i++) {
+      SlotDescriptor lhsChildSlotDesc = lhsChildSlotDescs.get(i);
+
+      Expr rhsChild = structExpr.getChildren().get(i);
+      Preconditions.checkState(rhsChild instanceof SlotRef);
+      SlotRef rhsChildSlotRef = (SlotRef) rhsChild;
+
+      List<String> lhsRawPath = lhsChildSlotDesc.getPath().getRawPath();
+      Path rhsPath = rhsChildSlotRef.getResolvedPath();
+
+      // The path can be null in the case of the sorting tuple.
+      if (rhsPath != null) {
+        List<String> rhsRawPath = rhsPath.getRawPath();
+
+        // Check that the children come in the same order on both lhs and rhs. If not, the
+        // last part of the paths would be different.
+        Preconditions.checkState(lhsRawPath.get(lhsRawPath.size() - 1)
+            .equals(rhsRawPath.get(rhsRawPath.size() - 1)));
+
+        result.add(new Pair(lhsChildSlotDesc, rhsChildSlotRef));
+
+        if (rhsChildSlotRef.getType().isStructType()) {
+          result.addAll(
+              getStructSlotDescSlotRefPairs(lhsChildSlotDesc, rhsChildSlotRef));
+        }
+      }
+    }
+    return result;
   }
 
   /**
diff --git a/fe/src/main/java/org/apache/impala/analysis/Path.java b/fe/src/main/java/org/apache/impala/analysis/Path.java
index 489e9a855..6e3fd2d3b 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Path.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Path.java
@@ -465,6 +465,26 @@ public class Path {
     return absolutePath_;
   }
 
+  /**
+   * If 'prefix' is a prefix of this path, returns a list with the elements of this path
+   * that would remain after removing the prefix; otherwise returns null.
+   *
+   * Uses 'getFullyQualifiedRawPath()' to obtain the elements
+   * of the paths.
+   */
+  public List<String> getRawPathWithoutPrefix(Path prefix) {
+    final List<String> prefixPath = prefix.getFullyQualifiedRawPath();
+    final List<String> thisPath = this.getFullyQualifiedRawPath();
+    return getRawPathWithoutPrefix(thisPath, prefixPath);
+  }
+
+  public static List<String> getRawPathWithoutPrefix(List<String> path,
+      List<String> prefix) {
+    if (prefix.size() > path.size()) return null;
+    if (!prefix.equals(path.subList(0, prefix.size()))) return null;
+    return path.subList(prefix.size(), path.size());
+  }
+
   /**
    * Converts table schema path to file schema path. Well, it's actually somewhere between
    * the two because the first column is offsetted with the number of partitions.
diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index 409235933..226c09e6d 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -19,11 +19,15 @@ package org.apache.impala.analysis;
 
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.stream.Stream;
+import java.util.stream.Collectors;
 
 import org.apache.impala.analysis.Path.PathType;
 import org.apache.impala.analysis.TableRef.ZippingUnnestType;
@@ -284,6 +288,8 @@ public class SelectStmt extends QueryStmt {
       // Start out with table refs to establish aliases.
       fromClause_.analyze(analyzer_);
 
+      registerStructSlotRefPathsWithAnalyzer();
+
       analyzeSelectClause();
       verifyResultExprs();
       registerViewColumnPrivileges();
@@ -324,6 +330,76 @@ public class SelectStmt extends QueryStmt {
       analyzer_.setSimpleLimitStatus(checkSimpleLimitStmt());
     }
 
+    /* Register paths that point to structs. This is to unify expressions and allow
+     * embedded (struct member) expressions to share the tuple memory of their enclosing
+     * expressions.
+     */
+    private void registerStructSlotRefPathsWithAnalyzer() throws AnalysisException {
+      Preconditions.checkNotNull(selectList_);
+      // Note: if we in the future include complex types in star expressions, we will have
+      // to expand star expressions here.
+      Stream<Expr> selectListExprs = selectList_.getItems().stream()
+        .filter(elem -> !elem.isStar())
+        .map(elem -> elem.getExpr());
+      Stream<Expr> nonSelectListExprs = collectExprsOutsideSelectList();
+
+      Stream<Expr> exprs = Stream.concat(selectListExprs, nonSelectListExprs);
+
+      HashSet<SlotRef> slotRefs = new HashSet<>();
+      exprs.forEach(expr -> expr.collect(SlotRef.class, slotRefs));
+
+      List<Path> paths = slotRefs.stream().map(this::slotRefToResolvedPath)
+          .filter(path -> path != null)
+          .filter(path -> path.destType().isStructType())
+          .collect(Collectors.toList());
+      // Sort paths by length in descending order so that structs that contain other
+      // structs come before their children.
+      Collections.sort(paths,
+          Comparator.<Path>comparingInt(path -> path.getMatchedTypes().size())
+          .reversed());
+      for (Path p : paths) {
+        analyzer_.registerSlotRef(p);
+      }
+    }
+
+    private Stream<Expr> collectExprsOutsideSelectList() {
+      Stream<Expr> res = Stream.empty();
+      if (whereClause_ != null) {
+        res = Stream.concat(res, Stream.of(whereClause_));
+      }
+      if (havingClause_ != null) {
+        res = Stream.concat(res, Stream.of(havingClause_));
+      }
+      if (groupingExprs_ != null) {
+        res = Stream.concat(res, groupingExprs_.stream());
+      }
+      if (sortInfo_ != null) {
+        res = Stream.concat(res, sortInfo_.getSortExprs().stream());
+      }
+      if (analyticInfo_ != null) {
+        res = Stream.concat(res,
+            analyticInfo_.getAnalyticExprs().stream()
+            .map(analyticExpr -> (Expr) analyticExpr));
+      }
+      return res;
+    }
+
+    private Path slotRefToResolvedPath(SlotRef slotRef) {
+      try {
+        Path resolvedPath = analyzer_.resolvePathWithMasking(slotRef.getRawPath(),
+            PathType.SLOT_REF);
+        return resolvedPath;
+      } catch (TableLoadingException e) {
+        // Should never happen because we only check registered table aliases.
+        Preconditions.checkState(false);
+        return null;
+      } catch (AnalysisException e) {
+        // Return null if analysis did not succeed. This means this path will not be
+        // registered here.
+        return null;
+      }
+    }
+
     private void analyzeSelectClause() throws AnalysisException {
       // Generate !empty() predicates to filter out empty collections.
       // Skip this step when analyzing a WITH-clause because CollectionTableRefs
diff --git a/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java b/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
index 8b6622883..8cd50b174 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
@@ -190,6 +190,66 @@ public class SlotDescriptor {
     return stats_;
   }
 
+  private void getEnclosingStructSlotAndTupleDescs(List<SlotDescriptor> slotDescs,
+      List<TupleDescriptor> tupleDescs) {
+    TupleDescriptor tupleDesc = getParent();
+    while (tupleDesc != null) {
+      if (tupleDescs != null) tupleDescs.add(tupleDesc);
+
+      final SlotDescriptor parentStructSlotDesc = tupleDesc.getParentSlotDesc();
+      if (parentStructSlotDesc != null && slotDescs != null) {
+        slotDescs.add(parentStructSlotDesc);
+      }
+
+      tupleDesc = parentStructSlotDesc != null ? parentStructSlotDesc.getParent() : null;
+    }
+  }
+
+  /**
+   * Returns the slot descs of the structs that contain this slot desc, recursively.
+   * For an example struct 'outer: <middle: <inner: <i: int>>>', called for the slot desc
+   * of 'i', the returned list will contain the slot descs of 'inner', 'middle' and
+   * 'outer'.
+   */
+  public List<SlotDescriptor> getEnclosingStructSlotDescs() {
+    List<SlotDescriptor> result = new ArrayList<>();
+    getEnclosingStructSlotAndTupleDescs(result, null);
+    return result;
+  }
+
+  /**
+   * Returns the tuple descs enclosing this slot desc, recursively.
+   * For an example struct 'outer: <middle: <inner: <i: int>>>', called for the slot desc
+   * of 'i', the returned list will contain the 'itemTupleDesc_'s of 'inner', 'middle'
+   * and 'outer' as well as the tuple desc of the main tuple (the 'parent_' of the slot
+   * desc of 'outer').
+   */
+  public List<TupleDescriptor> getEnclosingTupleDescs() {
+    List<TupleDescriptor> result = new ArrayList<>();
+    getEnclosingStructSlotAndTupleDescs(null, result);
+    return result;
+  }
+
+  /**
+   * Returns the size of the slot without null indicators.
+   *
+   * Takes materialisation into account: returns 0 for non-materialised slots. This is
+   * most relevant for structs as it is possible that only a subset of the fields are
+   * materialised and the size of the struct varies according to this.
+   */
+  public int getMaterializedSlotSize() {
+    if (!isMaterialized()) return 0;
+    if (!getType().isStructType()) return getType().getSlotSize();
+
+    Preconditions.checkNotNull(itemTupleDesc_);
+    int size = 0;
+    for (SlotDescriptor d : itemTupleDesc_.getSlots()) {
+      size += d.getMaterializedSlotSize();
+    }
+
+    return size;
+  }
+
   /**
    * Checks if this descriptor describes  an array "pos" pseudo-column.
    *
diff --git a/fe/src/main/java/org/apache/impala/analysis/SlotRef.java b/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
index 27438b897..836109f63 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
@@ -181,7 +181,7 @@ public class SlotRef extends Expr {
       if (!(rootTable instanceof FeFsTable)) {
         throw new AnalysisException(String.format(
             "%s is not supported when querying STRUCT type %s",
-            rootTable.getClass().toString(), type_.toSql()));
+            rootTable, type_.toSql()));
       }
       FeFsTable feTable = (FeFsTable)rootTable;
       for (HdfsFileFormat format : feTable.getFileFormats()) {
@@ -191,21 +191,21 @@ public class SlotRef extends Expr {
         }
       }
     }
-    if (type_.isStructType()) expandSlotRefForStruct(analyzer);
+    if (type_.isStructType()) addStructChildrenAsSlotRefs(analyzer);
   }
 
-  // This function expects this SlotRef to be a Struct and creates SlotRefs to represent
-  // the children of the struct. Also creates slot and tuple descriptors for the children
-  // of the struct.
-  private void expandSlotRefForStruct(Analyzer analyzer) throws AnalysisException {
+  /**
+   * Re-expands the struct: recreates the item tuple descriptor of 'desc_' and the child
+   * 'SlotRef's, recursively.
+   * Expects this 'SlotRef' to be a struct.
+   */
+  public void reExpandStruct(Analyzer analyzer) throws AnalysisException {
     Preconditions.checkState(type_ != null && type_.isStructType());
-    // If the same struct is present multiple times in the select list we create only a
-    // single TupleDescriptor instead of one for each occurence.
-    if (desc_.getItemTupleDesc() == null) {
-      checkForUnsupportedFieldsForStruct();
-      createStructTuplesAndSlots(analyzer, resolvedPath_);
-    }
-    addStructChildrenAsSlotRefs(analyzer, desc_.getItemTupleDesc());
+    desc_.clearItemTupleDesc();
+    children_.clear();
+
+    analyzer.createStructTuplesAndSlotDescs(desc_);
+    addStructChildrenAsSlotRefs(analyzer);
   }
 
   // Expects the type of this SlotRef as a StructType. Throws an AnalysisException if any
@@ -213,58 +213,30 @@ public class SlotRef extends Expr {
   private void checkForUnsupportedFieldsForStruct() throws AnalysisException {
     Preconditions.checkState(type_ instanceof StructType);
     for (StructField structField : ((StructType)type_).getFields()) {
-      if (!structField.getType().isSupported()) {
+      final Type fieldType = structField.getType();
+      if (!fieldType.isSupported()) {
         throw new AnalysisException("Unsupported type '"
-            + structField.getType().toSql() + "' in '" + toSql() + "'.");
+            + fieldType.toSql() + "' in '" + toSql() + "'.");
       }
-      if (structField.getType().isCollectionType()) {
+      if (fieldType.isCollectionType()) {
         throw new AnalysisException("Struct containing a collection type is not " +
             "allowed in the select list.");
       }
     }
   }
 
-  /**
-   * Creates a TupleDescriptor to hold the children of a struct slot and then creates and
-   * adds SlotDescriptors as struct children to this TupleDescriptor. Sets the created
-   * parent TupleDescriptor to 'desc_.itemTupleDesc_'.
-   */
-  public void createStructTuplesAndSlots(Analyzer analyzer, Path resolvedPath) {
-    TupleDescriptor structTuple =
-        analyzer.getDescTbl().createTupleDescriptor("struct_tuple");
-    if (resolvedPath != null) structTuple.setPath(resolvedPath);
-    structTuple.setType((StructType)type_);
-    structTuple.setParentSlotDesc(desc_);
-    for (StructField structField : ((StructType)type_).getFields()) {
-      SlotDescriptor slotDesc = analyzer.getDescTbl().addSlotDescriptor(structTuple);
-      // 'resolvedPath_' could be null e.g. when the query has an order by clause and
-      // this is the sorting tuple.
-      if (resolvedPath != null) {
-        Path relPath = Path.createRelPath(resolvedPath, structField.getName());
-        relPath.resolve();
-        slotDesc.setPath(relPath);
-      }
-      slotDesc.setType(structField.getType());
-      slotDesc.setIsMaterialized(true);
-    }
-    desc_.setItemTupleDesc(structTuple);
-  }
-
-  /**
-   * Assuming that 'structTuple' is the tuple for struct children this function iterates
-   * its slots, creates a SlotRef for each slot and adds them to 'children_' of this
-   * SlotRef.
-   */
-  public void addStructChildrenAsSlotRefs(Analyzer analyzer,
-      TupleDescriptor structTuple) throws AnalysisException {
+  // Assumes this 'SlotRef' is a struct and that desc_.itemTupleDesc_ has already been
+  // filled. Creates the children 'SlotRef's for the struct recursively.
+  private void addStructChildrenAsSlotRefs(Analyzer analyzer) throws AnalysisException {
+    Preconditions.checkState(desc_.getType().isStructType());
+    checkForUnsupportedFieldsForStruct();
+    TupleDescriptor structTuple = desc_.getItemTupleDesc();
     Preconditions.checkState(structTuple != null);
-    Preconditions.checkState(structTuple.getParentSlotDesc() != null);
-    Preconditions.checkState(structTuple.getParentSlotDesc().getType().isStructType());
     for (SlotDescriptor childSlot : structTuple.getSlots()) {
       SlotRef childSlotRef = new SlotRef(childSlot);
       children_.add(childSlotRef);
       if (childSlot.getType().isStructType()) {
-        childSlotRef.expandSlotRefForStruct(analyzer);
+        childSlotRef.addStructChildrenAsSlotRefs(analyzer);
       }
     }
   }
@@ -288,12 +260,16 @@ public class SlotRef extends Expr {
   @Override
   protected boolean isConstantImpl() { return false; }
 
+  public boolean hasDesc() { return desc_ != null; }
+
   public SlotDescriptor getDesc() {
     Preconditions.checkState(isAnalyzed());
     Preconditions.checkNotNull(desc_);
     return desc_;
   }
 
+  public List<String> getRawPath() { return rawPath_; }
+
   public SlotId getSlotId() {
     Preconditions.checkState(isAnalyzed());
     Preconditions.checkNotNull(desc_);
@@ -370,6 +346,19 @@ public class SlotRef extends Expr {
     public boolean matches(SlotRef a, SlotRef b) { return a.localEquals(b); }
   };
 
+  @Override
+  protected Expr substituteImpl(ExprSubstitutionMap smap, Analyzer analyzer) {
+    if (smap != null) {
+      Expr substExpr = smap.get(this);
+      if (substExpr != null) return substExpr.clone();
+    }
+
+    // SlotRefs must remain analyzed to support substitution across query blocks,
+    // therefore we do not call resetAnalysisState().
+    substituteImplOnChildren(smap, analyzer);
+    return this;
+  }
+
   @Override
   public boolean isBoundByTupleIds(List<TupleId> tids) {
     Preconditions.checkState(desc_ != null);
@@ -382,8 +371,8 @@ public class SlotRef extends Expr {
         if (tid.equals(parentId)) return true;
       }
     }
-    for (TupleId tid: tids) {
-      if (tid.equals(desc_.getParent().getId())) return true;
+    for (TupleDescriptor enclosingTupleDesc : desc_.getEnclosingTupleDescs()) {
+      if (tids.contains(enclosingTupleDesc.getId())) return true;
     }
     return false;
   }
@@ -391,7 +380,11 @@ public class SlotRef extends Expr {
   @Override
   public boolean isBoundBySlotIds(List<SlotId> slotIds) {
     Preconditions.checkState(isAnalyzed());
-    return slotIds.contains(desc_.getId());
+    if (slotIds.contains(desc_.getId())) return true;
+    for (SlotDescriptor enclosingSlotDesc : desc_.getEnclosingStructSlotDescs()) {
+      if (slotIds.contains(enclosingSlotDesc.getId())) return true;
+    }
+    return false;
   }
 
   @Override
@@ -400,6 +393,14 @@ public class SlotRef extends Expr {
     Preconditions.checkState(desc_ != null);
     if (slotIds != null) slotIds.add(desc_.getId());
     if (tupleIds != null) tupleIds.add(desc_.getParent().getId());
+
+    // If we are a struct, we need to add all fields recursively so they are materialised.
+    if (desc_.getType().isStructType() && slotIds != null) {
+      TupleDescriptor itemTupleDesc = desc_.getItemTupleDesc();
+      Preconditions.checkState(itemTupleDesc != null);
+      itemTupleDesc.getSlotsRecursively().stream()
+        .forEach(slotDesc -> slotIds.add(slotDesc.getId()));
+    }
   }
 
   @Override
diff --git a/fe/src/main/java/org/apache/impala/analysis/SortInfo.java b/fe/src/main/java/org/apache/impala/analysis/SortInfo.java
index 8d96f729d..995c686eb 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SortInfo.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SortInfo.java
@@ -258,10 +258,8 @@ public class SortInfo {
       SlotRef dstExpr = new SlotRef(dstSlotDesc);
       if (dstSlotDesc.getType().isStructType() &&
           dstSlotDesc.getItemTupleDesc() != null) {
-        dstSlotDesc.clearItemTupleDesc();
-        dstExpr.createStructTuplesAndSlots(analyzer, null);
         try {
-          dstExpr.addStructChildrenAsSlotRefs(analyzer, dstSlotDesc.getItemTupleDesc());
+          dstExpr.reExpandStruct(analyzer);
         } catch (AnalysisException ex) {
           // Adding SlotRefs shouldn't throw here as the source SlotRef had already been
           // analysed.
diff --git a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
index abcce3a1d..d5c42653f 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
@@ -135,6 +135,22 @@ public class TupleDescriptor {
   public TupleId getId() { return id_; }
   public List<SlotDescriptor> getSlots() { return slots_; }
 
+  /**
+   * Returns the slots in this 'TupleDescriptor' and if any slot is a struct slot, returns
+   * their slots as well, recursively. Does not descend into collections, only structs.
+   */
+  public List<SlotDescriptor> getSlotsRecursively() {
+    List<SlotDescriptor> res = new ArrayList();
+    for (SlotDescriptor slotDesc : slots_) {
+      res.add(slotDesc);
+      TupleDescriptor itemTupleDesc = slotDesc.getItemTupleDesc();
+      if (slotDesc.getType().isStructType() && itemTupleDesc != null) {
+        res.addAll(itemTupleDesc.getSlotsRecursively());
+      }
+    }
+    return res;
+  }
+
   public boolean hasMaterializedSlots() {
     for (SlotDescriptor slot: slots_) {
       if (slot.isMaterialized()) return true;
@@ -439,7 +455,7 @@ public class TupleDescriptor {
    * Receives a SlotDescriptor as a parameter and returns its size.
    */
   private int getSlotSize(SlotDescriptor slotDesc) {
-    int slotSize = slotDesc.getType().getSlotSize();
+    int slotSize = slotDesc.getMaterializedSlotSize();
     // Add padding for a KUDU string slot.
     if (slotDesc.isKuduStringSlot()) {
       slotSize += KUDU_STRING_PADDING;
@@ -459,7 +475,7 @@ public class TupleDescriptor {
       // Note, there are no stats for complex types slots so can't use average serialized
       // size from stats for them.
       // TODO: for computed slots, try to come up with stats estimates
-      avgSerializedSize_ += slotDesc.getType().getSlotSize();
+      avgSerializedSize_ += slotDesc.getMaterializedSlotSize();
     }
     // Add padding for a KUDU string slot.
     if (slotDesc.isKuduStringSlot()) {
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
index 3ea938ce6..832a198cc 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzerTest.java
@@ -258,7 +258,8 @@ public class AnalyzerTest extends FrontendTestBase {
 
   private void checkLayoutParams(String colAlias, int byteSize, int byteOffset,
       int nullIndicatorByte, int nullIndicatorBit, Analyzer analyzer) {
-    SlotDescriptor d = analyzer.getSlotDescriptor(colAlias);
+    List<String> colAliasRawPath = Arrays.asList(colAlias.split("\\."));
+    SlotDescriptor d = analyzer.getSlotDescriptor(colAliasRawPath);
     checkLayoutParams(d, byteSize, byteOffset, nullIndicatorByte, nullIndicatorBit);
   }
 
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index acc3ab7a4..fd5af1e82 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -35,6 +35,7 @@ import org.apache.impala.testutil.TestUtils.IgnoreValueFilter;
 import org.apache.impala.thrift.TRuntimeFilterType;
 import org.apache.impala.thrift.TExecRequest;
 import org.apache.impala.thrift.TExplainLevel;
+import org.apache.impala.thrift.TExplainResult;
 import org.apache.impala.thrift.TJoinDistributionMode;
 import org.apache.impala.thrift.TKuduReplicaSelection;
 import org.apache.impala.thrift.TQueryCtx;
@@ -791,6 +792,83 @@ public class PlannerTest extends PlannerTestBase {
     Assert.assertEquals(actualMtDop, expectedMtDop);
   }
 
+  @Test
+  public void testOnlyNeededStructFieldsMaterialized() throws ImpalaException {
+    // Tests that if a struct is selected in an inline view but only a subset of its
+    // fields are selected in the top level query, only the selected fields are
+    // materialised, not the whole struct.
+    // Also tests that selecting the whole struct or fields from an inline view or
+    // directly from the table give the same row size.
+
+    // For comlex types in the select list, we have to turn codegen off.
+    TQueryOptions queryOpts = defaultQueryOptions();
+    queryOpts.setDisable_codegen(true);
+
+    String queryWholeStruct =
+        "select outer_struct from functional_orc_def.complextypes_nested_structs";
+    int rowSizeWholeStruct = getRowSize(queryWholeStruct, queryOpts);
+
+    String queryWholeStructFromInlineView =
+        "with sub as (" +
+          "select id, outer_struct from functional_orc_def.complextypes_nested_structs)" +
+        "select sub.outer_struct from sub";
+    int rowSizeWholeStructFromInlineView = getRowSize(
+        queryWholeStructFromInlineView, queryOpts);
+
+    String queryOneField =
+        "select outer_struct.str from functional_orc_def.complextypes_nested_structs";
+    int rowSizeOneField = getRowSize(queryOneField, queryOpts);
+
+    String queryOneFieldFromInlineView =
+        "with sub as (" +
+          "select id, outer_struct from functional_orc_def.complextypes_nested_structs)" +
+        "select sub.outer_struct.str from sub";
+    int rowSizeOneFieldFromInlineView = getRowSize(
+        queryOneFieldFromInlineView, queryOpts);
+
+    String queryTwoFields =
+        "select outer_struct.str, outer_struct.inner_struct1 " +
+        "from functional_orc_def.complextypes_nested_structs";
+    int rowSizeTwoFields = getRowSize(queryTwoFields, queryOpts);
+
+    String queryTwoFieldsFromInlineView =
+        "with sub as (" +
+          "select id, outer_struct from functional_orc_def.complextypes_nested_structs)" +
+        "select sub.outer_struct.str, sub.outer_struct.inner_struct1 from sub";
+    int rowSizeTwoFieldsFromInlineView = getRowSize(
+        queryTwoFieldsFromInlineView, queryOpts);
+
+    Assert.assertEquals(rowSizeWholeStruct, rowSizeWholeStructFromInlineView);
+    Assert.assertEquals(rowSizeOneField, rowSizeOneFieldFromInlineView);
+    Assert.assertEquals(rowSizeTwoFields, rowSizeTwoFieldsFromInlineView);
+
+    Assert.assertTrue(rowSizeOneField < rowSizeTwoFields);
+    Assert.assertTrue(rowSizeTwoFields < rowSizeWholeStruct);
+  }
+
+  @Test
+  public void testStructFieldSlotSharedWithStruct() throws ImpalaException {
+    // Tests that in the case where a struct and one of its fields are both present in the
+    // select list, no extra slot is generated in the row for the struct field but the
+    // memory of the struct is reused, i.e. the row size is the same as when only the
+    // struct is queried.
+
+    // For comlex types in the select list, we have to turn codegen off.
+    TQueryOptions queryOpts = defaultQueryOptions();
+    queryOpts.setDisable_codegen(true);
+
+    String queryWithoutField =
+        "select id, outer_struct from functional_orc_def.complextypes_nested_structs";
+    int rowSizeWithoutField = getRowSize(queryWithoutField, queryOpts);
+
+    String queryWithField =
+        "select id, outer_struct, outer_struct.str " +
+        "from functional_orc_def.complextypes_nested_structs";
+    int rowSizeWithField = getRowSize(queryWithField, queryOpts);
+
+    Assert.assertEquals(rowSizeWithoutField, rowSizeWithField);
+  }
+
   @Test
   public void testResourceRequirements() {
     // Tests the resource requirement computation from the planner.
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
index f95548072..7974c48d4 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
@@ -18,6 +18,7 @@
 package org.apache.impala.planner;
 
 import static org.junit.Assert.fail;
+import org.junit.Assert;
 
 import java.io.FileWriter;
 import java.io.IOException;
@@ -803,6 +804,30 @@ public class PlannerTestBase extends FrontendTestBase {
     return builder.toString();
   }
 
+  protected int getRowSize(String query, TQueryOptions queryOptions) {
+    TQueryCtx queryCtx = TestUtils.createQueryContext(Catalog.DEFAULT_DB,
+        System.getProperty("user.name"));
+    queryCtx.client_request.setStmt(query);
+    PlanCtx planCtx = new PlanCtx(queryCtx);
+    queryCtx.client_request.query_options = queryOptions;
+
+    try {
+      TExecRequest execRequest = frontend_.createExecRequest(planCtx);
+    } catch (ImpalaException e) {
+      Assert.fail("Failed to create exec request for '" + query + "': " + e.getMessage());
+    }
+
+    String explainStr = planCtx.getExplainString();
+
+    Pattern rowSizePattern = Pattern.compile("row-size=([0-9]*)B");
+    Matcher m = rowSizePattern.matcher(explainStr);
+    boolean matchFound = m.find();
+    Assert.assertTrue("Row size not found in plan.", matchFound);
+    String rowSizeStr = m.group(1);
+    return Integer.valueOf(rowSizeStr);
+  }
+
+
   /**
    * Assorted binary options that alter the behaviour of planner tests, generally
    * enabling additional more-detailed checks.
diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-struct-in-select-list.test b/testdata/workloads/functional-query/queries/QueryTest/nested-struct-in-select-list.test
index e0b93a324..95676c5e3 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/nested-struct-in-select-list.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/nested-struct-in-select-list.test
@@ -119,6 +119,175 @@ select sub.id, sub.outer_struct from sub;
 INT,STRING
 ====
 ---- QUERY
+# WITH clause creates an inline view containing a nested struct and we query a nested
+# field.
+with sub as (
+    select id, outer_struct from functional_orc_def.complextypes_nested_structs)
+select sub.id, sub.outer_struct.inner_struct2 from sub;
+---- RESULTS
+1,'{"i":333222111,"str":"somestr3"}'
+2,'{"i":100,"str":"str3"}'
+3,'NULL'
+4,'{"i":1,"str":"string"}'
+5,'NULL'
+---- TYPES
+INT,STRING
+====
+---- QUERY
+# WITH clause creates an inline view containing a nested struct and we query a doubly
+# nested field.
+with sub as (
+    select id, outer_struct from functional_orc_def.complextypes_nested_structs)
+select sub.id, sub.outer_struct.inner_struct2.i from sub;
+---- RESULTS
+1,333222111
+2,100
+3,NULL
+4,1
+5,NULL
+---- TYPES
+INT,INT
+====
+---- QUERY
+# WITH clause creates an inline view containing a nested struct and one of its members
+# that is a struct itself; query both in the main select statement.
+with sub as (
+    select id, outer_struct, outer_struct.inner_struct3 inner3
+        from functional_orc_def.complextypes_nested_structs)
+select id, outer_struct, inner3 from sub;
+---- RESULTS
+1,'{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}','{"s":{"i":112288,"s":null}}'
+2,'{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}','{"s":{"i":321,"s":"dfgs"}}'
+3,'NULL','NULL'
+4,'{"str":"","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":1,"str":"string"},"inner_struct3":{"s":null}}','{"s":null}'
+5,'{"str":null,"inner_struct1":null,"inner_struct2":null,"inner_struct3":null}','NULL'
+---- TYPES
+INT,STRING,STRING
+====
+---- QUERY
+# WITH clause creates an inline view containing a nested struct ('outer_struct') and one
+# of its members that is a struct itself ('inner3'); query a different member of
+# 'outer_struct' as well as 'inner3' in the main select statement.
+with sub as (
+    select id, outer_struct, outer_struct.inner_struct3 inner3
+        from functional_orc_def.complextypes_nested_structs)
+select id, outer_struct.inner_struct2, inner3 from sub;
+---- RESULTS
+1,'{"i":333222111,"str":"somestr3"}','{"s":{"i":112288,"s":null}}'
+2,'{"i":100,"str":"str3"}','{"s":{"i":321,"s":"dfgs"}}'
+3,'NULL','NULL'
+4,'{"i":1,"str":"string"}','{"s":null}'
+5,'NULL','NULL'
+---- TYPES
+INT,STRING,STRING
+====
+---- QUERY
+# WITH clause creates an inline view containing a nested struct ('outer_struct') and one
+# of its members that is a struct itself ('inner3'); query a different member of
+# 'outer_struct' as well as a member of 'inner3' in the main select statement.
+with sub as (
+    select id, outer_struct, outer_struct.inner_struct3 inner3
+        from functional_orc_def.complextypes_nested_structs)
+select id, outer_struct.inner_struct2, inner3.s from sub;
+---- RESULTS
+1,'{"i":333222111,"str":"somestr3"}','{"i":112288,"s":null}'
+2,'{"i":100,"str":"str3"}','{"i":321,"s":"dfgs"}'
+3,'NULL','NULL'
+4,'{"i":1,"str":"string"}','NULL'
+5,'NULL','NULL'
+---- TYPES
+INT,STRING,STRING
+====
+---- QUERY
+# WITH clause creates an inline view containing a nested struct; we order by id.
+with sub as (
+    select id, outer_struct from functional_orc_def.complextypes_nested_structs)
+select sub.id, sub.outer_struct.inner_struct2 from sub order by sub.id desc;
+---- RESULTS
+5,'NULL'
+4,'{"i":1,"str":"string"}'
+3,'NULL'
+2,'{"i":100,"str":"str3"}'
+1,'{"i":333222111,"str":"somestr3"}'
+---- TYPES
+INT,STRING
+====
+---- QUERY
+# WITH clause creates an inline view containing a nested struct; we order by a nested
+# field.
+with sub as (
+    select id, outer_struct from functional_orc_def.complextypes_nested_structs)
+select sub.id, sub.outer_struct.inner_struct2 from sub
+order by sub.outer_struct.inner_struct2.i, sub.id;
+---- RESULTS
+4,'{"i":1,"str":"string"}'
+2,'{"i":100,"str":"str3"}'
+1,'{"i":333222111,"str":"somestr3"}'
+3,'NULL'
+5,'NULL'
+---- TYPES
+INT,STRING
+====
+---- QUERY
+# WITH clause creates an inline view containing a nested struct; we order by a nested
+# field that is not present in the select list.
+with sub as (
+    select id, outer_struct from functional_orc_def.complextypes_nested_structs)
+select sub.id, sub.outer_struct.inner_struct1 from sub
+order by sub.outer_struct.inner_struct2.i, sub.id;
+---- RESULTS
+4,'{"str":"somestr2","de":12345.12}'
+2,'NULL'
+1,'{"str":"somestr2","de":12345.12}'
+3,'NULL'
+5,'NULL'
+---- TYPES
+INT,STRING
+====
+---- QUERY
+# WITH clause creates an inline view containing a nested struct; filter by a struct field
+# from the inline view.
+with sub as (
+    select id, outer_struct from functional_orc_def.complextypes_nested_structs)
+select sub.id, sub.outer_struct.str
+from sub
+where length(sub.outer_struct.str) < 4;
+---- RESULTS
+2,'str'
+4,''
+---- TYPES
+INT,STRING
+====
+---- QUERY
+# WITH clause creates an inline view containing a two structs; select only one of them and
+# filter by one of its fields.
+with sub as (
+    select id, outer_struct.inner_struct1 s1, outer_struct.inner_struct2 s2
+    from functional_orc_def.complextypes_nested_structs)
+select sub.id, s2
+from sub
+where length(s2.str) < 8;
+---- RESULTS
+2,'{"i":100,"str":"str3"}'
+4,'{"i":1,"str":"string"}'
+---- TYPES
+INT,STRING
+====
+---- QUERY
+# WITH clause creates an inline view containing a nested struct; filter by a struct field
+# from the inline view but do not select anything from it.
+with sub as (
+    select id, outer_struct from functional_orc_def.complextypes_nested_structs)
+select 1
+from sub
+where length(sub.outer_struct.str) < 4;
+---- RESULTS
+1
+1
+---- TYPES
+TINYINT
+====
+---- QUERY
 # WITH clause creates an inline view containing a nested struct. Also has a filter on
 # the inline view and ordering by a non-complex item from the view.
 with sub as (
@@ -132,6 +301,98 @@ select sub.id, sub.outer_struct from sub order by sub.id desc;
 INT,STRING
 ====
 ---- QUERY
+# Two-level inline view, querying a struct filed.
+with sub as (
+    select id, outer_struct from functional_orc_def.complextypes_nested_structs)
+select id, s from (select id, outer_struct.inner_struct1 as s from sub) v
+order by id;
+---- RESULTS
+1,'{"str":"somestr2","de":12345.12}'
+2,'NULL'
+3,'NULL'
+4,'{"str":"somestr2","de":12345.12}'
+5,'NULL'
+---- TYPES
+INT,STRING
+====
+---- QUERY
+# Two-level inline view, querying the top level struct.
+with sub as (
+    select id, outer_struct from functional_orc_def.complextypes_nested_structs)
+select id, s from (select id, outer_struct as s from sub) v
+order by id;
+---- RESULTS
+1,'{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}'
+2,'{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}'
+3,'NULL'
+4,'{"str":"","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":1,"str":"string"},"inner_struct3":{"s":null}}'
+5,'{"str":null,"inner_struct1":null,"inner_struct2":null,"inner_struct3":null}'
+---- TYPES
+INT,STRING
+====
+---- QUERY
+# Select a struct that contains multiple structs using a filter on multiple struct fields
+# when the struct fields in the predicate are itemTupleDescriptors within the struct(s),
+# not in the main tuple.
+select id, outer_struct
+from functional_orc_def.complextypes_nested_structs
+where outer_struct.inner_struct2.i > length(outer_struct.str)
+---- RESULTS
+1,'{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}'
+2,'{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}'
+4,'{"str":"","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":1,"str":"string"},"inner_struct3":{"s":null}}'
+---- TYPES
+INT,STRING
+====
+---- QUERY
+# Select a nested struct member first, and then the enclosing struct so that even the
+# enclosing struct is embedded in another one, but the top level struct is not present in
+# the query.
+select id, outer_struct.inner_struct2.i, outer_struct.inner_struct2
+from functional_orc_def.complextypes_nested_structs;
+---- RESULTS
+1,333222111,'{"i":333222111,"str":"somestr3"}'
+2,100,'{"i":100,"str":"str3"}'
+3,NULL,'NULL'
+4,1,'{"i":1,"str":"string"}'
+5,NULL,'NULL'
+---- TYPES
+INT,INT,STRING
+====
+---- QUERY
+# An inner join where struct fields are in the join condition and their parent struct is
+# in the select list.
+select a.outer_struct, b.small_struct
+from functional_orc_def.complextypes_nested_structs a
+    inner join functional_orc_def.complextypes_structs b
+    on b.small_struct.i = a.outer_struct.inner_struct2.i + 19091;
+---- RESULTS
+'{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}','{"i":19191,"s":"small_struct_str"}'
+---- TYPES
+STRING,STRING
+====
+---- QUERY
+# An outer join where struct fields are in the join condition and their parent struct is
+# in the select list.
+select a.outer_struct, b.small_struct
+from functional_orc_def.complextypes_nested_structs a
+    full outer join functional_orc_def.complextypes_structs b
+    on b.small_struct.i = a.outer_struct.inner_struct2.i + 19091;
+---- RESULTS
+'{"str":"somestr1","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":333222111,"str":"somestr3"},"inner_struct3":{"s":{"i":112288,"s":null}}}','NULL'
+'{"str":"str","inner_struct1":null,"inner_struct2":{"i":100,"str":"str3"},"inner_struct3":{"s":{"i":321,"s":"dfgs"}}}','{"i":19191,"s":"small_struct_str"}'
+'NULL','NULL'
+'{"str":"","inner_struct1":{"str":"somestr2","de":12345.12},"inner_struct2":{"i":1,"str":"string"},"inner_struct3":{"s":null}}','NULL'
+'{"str":null,"inner_struct1":null,"inner_struct2":null,"inner_struct3":null}','NULL'
+'NULL','{"i":98765,"s":"abcde f"}'
+'NULL','{"i":98765,"s":null}'
+'NULL','{"i":null,"s":null}'
+'NULL','{"i":null,"s":"str"}'
+'NULL','NULL'
+---- TYPES
+STRING,STRING
+====
+---- QUERY
 # Checks that "SELECT nested_struct.* ..." omits the nested structs from the output.
 select id, outer_struct.* from functional_orc_def.complextypes_nested_structs;
 ---- RESULTS