You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by su...@apache.org on 2017/01/27 20:16:50 UTC

[1/3] incubator-trafodion git commit: [TRAFODION-2315] Heuristic decision for common subexpressions

Repository: incubator-trafodion
Updated Branches:
  refs/heads/master b6478e582 -> 222b6b76d


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/RelExpr.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/RelExpr.h b/core/sql/optimizer/RelExpr.h
index d2d6c20..1b9fad9 100644
--- a/core/sql/optimizer/RelExpr.h
+++ b/core/sql/optimizer/RelExpr.h
@@ -601,8 +601,9 @@ public:
        const ValueIdSet &predicatesToRemove,
        const ValueIdSet &commonPredicatesToAdd,
        const ValueIdSet &inputsToRemove,
-       CSEInfo *info,
-       NABoolean testRun);
+       ValueIdSet &valuesForVEGRewrite,
+       ValueIdSet &keyColumns,
+       CSEInfo *info);
 
   // A virtual method called on every node from prepareTreeForCSESharing(),
   // use this to do the actual work of removing predicates other than
@@ -629,8 +630,9 @@ public:
        const ValueIdSet &predicatesToRemove,
        const ValueIdSet &commonPredicatesToAdd,
        const ValueIdSet &inputsToRemove,
-       CSEInfo *info,
-       NABoolean testRun);
+       ValueIdSet &valuesForVEGRewrite,
+       ValueIdSet &keyColumns,
+       CSEInfo *info);
 
   // --------------------------------------------------------------------
   // Create a query execution plan.
@@ -968,6 +970,8 @@ public:
                                 CollHeap* outHeap = NULL);
   RelExpr * copyTree(CollHeap* heap = NULL);
   RelExpr * copyRelExprTree(CollHeap* outHeap = NULL);
+  const RelExpr * getOriginalExpr(NABoolean transitive = TRUE) const;
+  RelExpr * getOriginalExpr(NABoolean transitive = TRUE);
 
   // --------------------------------------------------------------------
   // Methods used internally by Cascades
@@ -1594,6 +1598,10 @@ private:
   // the partfunc pointed by my spp has a dop reduction.
   NABoolean dopReduced_;
 
+  // if we copy an expression with copyTopNode() then
+  // remember the original here, e.g. to find VEG regions
+  RelExpr *originalExpr_;
+
 public:
 
   // begin: accessors & mutators for relexpr tracking info

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/RelGrby.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/RelGrby.h b/core/sql/optimizer/RelGrby.h
index b5f1aee..8f4c21b 100644
--- a/core/sql/optimizer/RelGrby.h
+++ b/core/sql/optimizer/RelGrby.h
@@ -223,8 +223,9 @@ public:
        const ValueIdSet &predicatesToRemove,
        const ValueIdSet &commonPredicatesToAdd,
        const ValueIdSet &inputsToRemove,
-       CSEInfo *info,
-       NABoolean testRun);
+       ValueIdSet &valuesForVEGRewrite,
+       ValueIdSet &keyColumns,
+       CSEInfo *info);
 
   // flows compRefOpt constraints up the query tree.
   virtual void processCompRefOptConstraints(NormWA * normWAPtr) ;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/RelJoin.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/RelJoin.h b/core/sql/optimizer/RelJoin.h
index 2c66b31..a185ff7 100644
--- a/core/sql/optimizer/RelJoin.h
+++ b/core/sql/optimizer/RelJoin.h
@@ -792,8 +792,9 @@ public:
        const ValueIdSet &predicatesToRemove,
        const ValueIdSet &commonPredicatesToAdd,
        const ValueIdSet &inputsToRemove,
-       CSEInfo *info,
-       NABoolean testRun);
+       ValueIdSet &valuesForVEGRewrite,
+       ValueIdSet &keyColumns,
+       CSEInfo *info);
 
   // Detect whether rows coming from the ith child contain multi-column skew for
   // a set of join predicates. The output argument vidOfEquiJoinWithSkew is the

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/RelMisc.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/RelMisc.h b/core/sql/optimizer/RelMisc.h
index 7812d0b..d892e45 100644
--- a/core/sql/optimizer/RelMisc.h
+++ b/core/sql/optimizer/RelMisc.h
@@ -2557,6 +2557,9 @@ private:
   NAString comment_;
 };
 
+// forward reference
+class CountedCSEInfo;
+
 // Container for the common info about a common subexpression. This is
 // a helper class for class CommonSubExprRef below.
 
@@ -2576,10 +2579,20 @@ public:
   enum CSEAnalysisOutcome
     {
       UNKNOWN_ANALYSIS,    // analysis not yet done
+      ELIMINATED_IN_BINDER,// single-consumer CSE, eliminated in binder
       EXPAND,              // expand the common subexpression
       CREATE_TEMP,         // materialize CSE as temp, then read the temp
       TEMP,                // read the temp created by someone else
       ERROR                // error occurred, diags are set
+      // possible future analysis outcomes:
+      // - decide outcome in the optimizer
+      // - pipeline results from a single producer to multiple consumers
+      //
+      // Note: Right now, all consumers of a CSE have the same
+      //       outcome; that may change in the future,
+      //       e.g. we may have 5 consumers that can use sharing
+      //       very well and one that isn't suitable for sharing
+      //       (maybe because it reads different data).
     };
 
   enum CSETempTableType
@@ -2595,7 +2608,10 @@ public:
        cseId_(-1),
        childCSEs_(mem),
        consumers_(mem),
+       alternativeConsumers_(mem),
+       numLexicalRefs_(0),
        neededColumns_(mem),
+       cseTreeKeyColumns_(mem),
        idOfAnalyzingConsumer_(-1),
        analysisOutcome_(UNKNOWN_ANALYSIS),
        tempTableType_(UNKNOWN_TEMP_TABLE),
@@ -2607,12 +2623,16 @@ public:
 
   const NAString &getName() const                            { return name_; }
   Int32 getCSEId() const                                    { return cseId_; }
-  const LIST(CSEInfo *) &getChildCSEs() const           { return childCSEs_; }
+  const LIST(CountedCSEInfo) &getChildCSEs() const      { return childCSEs_; }
   const CollIndex getNumConsumers() const     { return consumers_.entries(); }
   CommonSubExprRef *getConsumer(CollIndex i) const   { return consumers_[i]; }
+  Int32 getNumLexicalRefs() const                  { return numLexicalRefs_; }
+  Int32 getTotalNumRefs(Int32 restrictToSingleConsumer = -1) const;
 
   Int32 getIdOfAnalyzingConsumer() const    { return idOfAnalyzingConsumer_; }
   CSEAnalysisOutcome getAnalysisOutcome(Int32 id) const;
+  NABoolean isShared(Int32 id) const
+                                   { return analysisOutcome_ == CREATE_TEMP; }
   NABoolean usesATempTable() const         { return insertIntoTemp_ != NULL; }
   CSETempTableType getTempTableType() const         { return tempTableType_; }
   const NABitVector &getNeededColumns() const       { return neededColumns_; }
@@ -2621,14 +2641,20 @@ public:
                                     { return vegRefsWithDifferingConstants_; }
   const ValueIdSet &getVEGRefsWithDifferingInputs() const
                                        { return vegRefsWithDifferingInputs_; }
+  const NABitVector &getCSETreeKeyColumns() const
+                                                { return cseTreeKeyColumns_; }
   const QualifiedName &getTempTableName() const     { return tempTableName_; }
   const NAString &getTempTableDDL() const            { return tempTableDDL_; }
   const NATable *getTempNATable() const               { return tempNATable_; }
   RelExpr *getInsertIntoTemp() const               { return insertIntoTemp_; }
 
   void setCSEId(Int32 id)                                     { cseId_ = id; }
-  void addChildCSE(CSEInfo *child);
+  Int32 addChildCSE(CSEInfo *childInfo, NABoolean addLexicalRef);
   void addCSERef(CommonSubExprRef *cse);
+  void eliminate()               { analysisOutcome_ == ELIMINATED_IN_BINDER; }
+  void registerAnAlternativeConsumer(CommonSubExprRef *c)
+                                          { alternativeConsumers_.insert(c); }
+  void replaceConsumerWithAnAlternative(CommonSubExprRef *c);
   void setIdOfAnalyzingConsumer(Int32 id)     { idOfAnalyzingConsumer_ = id; }
   void setAnalysisOutcome(CSEAnalysisOutcome outcome)
                                                { analysisOutcome_ = outcome; }
@@ -2641,6 +2667,7 @@ public:
                                       { vegRefsWithDifferingConstants_ += s; }
   void addVEGRefsWithDifferingInputs(const ValueIdSet &s)
                                          { vegRefsWithDifferingInputs_ += s; }
+  void addCSEKeyColumn(CollIndex c)               { cseTreeKeyColumns_ += c; }
   void setTempTableName(const QualifiedName &n)        { tempTableName_ = n; }
   void setTempTableDDL(const char *s)                   { tempTableDDL_ = s; }
   void setTempNATable(const NATable *nat)              { tempNATable_ = nat; }
@@ -2654,13 +2681,27 @@ private:
   Int32 cseId_;
 
   // list of other CSEs that are referenced by this one
-  LIST(CSEInfo *) childCSEs_;
+  LIST(CountedCSEInfo) childCSEs_;
 
   // list of nodes referring to the common
   // subexpression, their index numbers match
   // the index in this list
   LIST(CommonSubExprRef *) consumers_;
 
+  // We sometimes make copies of a tree, creating alternative
+  // consumers. Some notes on these alternative copies: The analyzing
+  // consumer (see CommonSubExprRef::analyzeAndPrepareForSharing())
+  // and its ancestors are always in the consumers_ list and do not
+  // change. As we replace a tree with its copy, we may replace other
+  // consumers with their respective copies. The code must be able to
+  // deal with this situation, so be careful when making decisions
+  // based on a particular consumer obtained from this list.
+  LIST(CommonSubExprRef *) alternativeConsumers_;
+
+  // number of lexical refs in the query for this expression
+  // (how many time does it appear in the query text)
+  Int32 numLexicalRefs_;
+
   // a common list of columns and predicate to use used for a
   // materialized CSE
 
@@ -2673,6 +2714,13 @@ private:
   ValueIdSet vegRefsWithDifferingConstants_;
   ValueIdSet vegRefsWithDifferingInputs_;
 
+  // information for heuristics
+
+  // "key" columns in the child tree, this could include Hive
+  // partition columns, "tag" constant columns in a union, and also
+  // trailing key columns that may not be significant
+  NABitVector cseTreeKeyColumns_;
+
   // information for the materialization of the CSE
   Int32 idOfAnalyzingConsumer_;
   CSEAnalysisOutcome analysisOutcome_;
@@ -2683,6 +2731,29 @@ private:
   RelExpr *insertIntoTemp_;
 };
 
+// A CSEInfo and a count (of how many references we have to it)
+class CountedCSEInfo
+{
+public:
+
+  CountedCSEInfo() : info_(NULL), lexicalCount_(-1)                         {}
+  CountedCSEInfo(CSEInfo *info, Int32 cnt = 0) :
+                                            info_(info), lexicalCount_(cnt) {}
+  CountedCSEInfo(const CountedCSEInfo &other) :
+                     info_(other.info_), lexicalCount_(other.lexicalCount_) {}
+  ~CountedCSEInfo()                                                         {}
+
+  CSEInfo *getInfo() const                                   { return info_; }
+  Int32 getLexicalCount() const                      { return lexicalCount_; }
+
+  void incrementLexicalCount()                            { lexicalCount_++; }
+
+private:
+
+  CSEInfo *info_;
+  Int32 lexicalCount_;
+};
+
 // -----------------------------------------------------------------------
 // The CommonSubExprRef class represents a potential common subexpression
 // (CSE) in the query tree. The common subexpression has a name, and
@@ -2705,6 +2776,11 @@ public:
        : RelExpr(REL_COMMON_SUBEXPR_REF,cse,NULL,oHeap),
          internalName_(internalName, oHeap),
          id_(-1),
+         isAnExpansionOf_(NULL),
+         isAnAlternativeOf_(NULL),
+         parentCSEId_(-1),
+         parentRefId_(-1),
+         lexicalRefNumFromParent_(-1),
          hbAccessOptionsFromCTE_(NULL)
   {}
 
@@ -2713,6 +2789,10 @@ public:
   // the name used in the CTE or a generated name
   const NAString &getName() const                    { return internalName_; }
   Int32 getId() const                                          { return id_; }
+  Int32 getParentCSEId() const                        { return parentCSEId_; }
+  Int32 getParentConsumerId() const                   { return parentRefId_; }
+  Int32 getLexicalRefNumFromParent() const {return lexicalRefNumFromParent_; }
+  NABoolean isAChildOfTheMainQuery() const;
 
   virtual Int32 getArity() const;
 
@@ -2721,6 +2801,7 @@ public:
   const ValueIdSet & getNonVEGColumns() const       { return nonVEGColumns_; }
 
   const ValueIdSet &getPushedPredicates() const  { return pushedPredicates_; }
+  const EstLogPropSharedPtr &getEstLogProps() const{ return cseEstLogProps_; }
 
   void setId(Int32 id)                     { CMPASSERT(id_ == -1); id_ = id; }
 
@@ -2729,10 +2810,46 @@ public:
                                             { hbAccessOptionsFromCTE_ = hbo; }
 
   // add this node to the global list of CommonSubExprRefs kept in CmpStatement
-  void addToCmpStatement();
+  void addToCmpStatement(NABoolean lexicalRef);
+
+  // establish a parent/child relationship between two CommonSubExprRefs
+  void addParentRef(CommonSubExprRef *parentRef);
 
   // is this the first reference to the common subexpression?
-  NABoolean isFirstReference();
+  NABoolean isFirstReference() const;
+
+  // CommonSubExprRefs come in different flavors:
+  //
+  // Lexical ref:     This is a node that got created in a parser
+  //                  rule, parsing a reference to a CTE (or, in the
+  //                  future, something equivalent, like a view reference).
+  // Expanded ref:    Since the parser expands CTE references by making a
+  //                  copy on each reference, if that copied tree contains
+  //                  child references, an expanded ref is created.
+  //                  Lexical and expanded refs shouldn't be treated
+  //                  differently, it is somewhat arbitrary which one
+  //                  is the lexical one and which ones are expanded.
+  // Original ref:    This is the original copy of a lexical or expanded
+  //                  ref.
+  // Alternative ref: Sometimes we copy a tree after binding, e.g. to
+  //                  have a fall-back during the SQO phase. Such copies
+  //                  are alternative refs, and only one of them should
+  //                  be used in the output of each compilation phase.
+  //                  Only one of the alternatives is part of the
+  //                  CSEInfo consumer list, the others are stored in
+  //                  the alternative consumer list.
+  //
+  // A given CommonSubExprRef is either a lexical or an expanded ref.
+  // The "alternative" flavor is orthogonal to that, both lexical
+  // and expanded refs can either be an original or an alternative.
+  NABoolean isALexicalRef() const         { return isAnExpansionOf_ == NULL; }
+  NABoolean isAnExpandedRef() const       { return isAnExpansionOf_ != NULL; }
+  NABoolean isAnOriginalRef() const     { return isAnAlternativeOf_ == NULL; }
+  NABoolean isAnAlternativeRef() const  { return isAnAlternativeOf_ != NULL; }
+  CommonSubExprRef *getLexicalRef()
+                        { return isAnExpansionOf_ ? isAnExpansionOf_ : this; }
+  CommonSubExprRef *getOriginalRef()
+                    { return isAnAlternativeOf_ ? isAnAlternativeOf_ : this; }
 
   // a virtual function for performing name binding within the query tree
   virtual RelExpr * bindNode(BindWA *bindWAPtr);
@@ -2749,6 +2866,14 @@ public:
        Lng32 childId = (-MAX_REL_ARITY));
   virtual void rewriteNode(NormWA & normWARef);
   virtual RelExpr * semanticQueryOptimizeNode(NormWA & normWARef);
+  virtual NABoolean prepareMeForCSESharing(
+       const ValueIdSet &outputsToAdd,
+       const ValueIdSet &predicatesToRemove,
+       const ValueIdSet &commonPredicatesToAdd,
+       const ValueIdSet &inputsToRemove,
+       ValueIdSet &valuesForVEGRewrite,
+       ValueIdSet &keyColumns,
+       CSEInfo *info);
 
   // add all the expressions that are local to this
   // node to an existing list of expressions (used by GUI tool)
@@ -2773,6 +2898,10 @@ public:
   // for use by the root node for inlining
   static Union *makeUnion(RelExpr *lc, RelExpr *rc, NABoolean blocked);
 
+  static void makeValueIdListFromBitVector(ValueIdList &tgt,
+                                           const ValueIdList &src,
+                                           const NABitVector &vec);
+
   // for debugging
   void display();
   static void displayAll(const char *optionalId = NULL);
@@ -2817,10 +2946,6 @@ private:
   RelExpr * createTempScan(CSEInfo &info, NormWA & normWARef);
   RelExpr * getTempScan() const                           { return tempScan_; }
 
-  static void makeValueIdListFromBitVector(ValueIdList &tgt,
-                                           const ValueIdList &src,
-                                           const NABitVector &vec);
-
   // data members
   // ------------
 
@@ -2833,6 +2958,40 @@ private:
   // common subexprssion. These references are numbered 0, 1, ...
   Int32 id_;
 
+  // indicate different flavors of CommonSubExprRef nodes and point
+  // back to the original node(s), if any
+  CommonSubExprRef *isAnExpansionOf_;
+  CommonSubExprRef *isAnAlternativeOf_;
+
+  // There are three ids that describe our predecessor in the
+  // RelExpr tree and in the lexical directed multigraph of the
+  // CSEs:
+  // Parent CSE id:
+  //     This is the the parent CSE or main query that directly
+  //     contains the reference. In the RelExpr tree, this is our
+  //     closest ancestor CommonSubExprRef node or the root node
+  //     of the tree. It is stored as the integer CSE id here,
+  //     we could also store the name. A special id is used for
+  //     the main query: CmpStatement::getCSEIdForMainQuery().
+  //
+  // Parent Ref id:
+  //     This is the consumer id of the parent CommonSubExprRef
+  //     (or 0 if the ref originates from the main query).
+  //
+  // Lexical ref num from parent:
+  //     This indicates which reference from the parent CSE we
+  //     are looking at. The main query or a CSE may refer to
+  //     the same child multiple times, and this is the number
+  //     indicating this (0...n-1). The id_ data member counts
+  //     the total number of references to a CSE; the lexical
+  //     ref num from parent counts only the lexical number
+  //     of references and only from a particular parent CSE
+  //     (or the main query).
+  //
+  Int32 parentCSEId_;
+  Int32 parentRefId_;
+  Int32 lexicalRefNumFromParent_;
+
   // The list of columns produced by the common subexpression.
   // We keep the full list here, even when the characteristic
   // outputs get reduced during the normalization phase. This
@@ -2841,8 +3000,9 @@ private:
   // of different consumers is by position in this list.
   ValueIdList columnList_;
 
-  // same columns without making VEGRefs. This is needed
-  // in preCodeGen.
+  // Same columns without making VEGRefs. This is needed
+  // in preCodeGen. It also includes other potential VEG
+  // members if this is the analyzing consumer.
   ValueIdSet nonVEGColumns_;
 
   // The common inputs (typically parameters). Pushing down
@@ -2873,6 +3033,12 @@ private:
   // create a temp table for the resulting CSE.
   HbaseAccessOptions *hbAccessOptionsFromCTE_;
 
+  // the estimated logical properties of this common subexpression,
+  // set in the analyzing consumer only, since they should be the same
+  // for all consumers
+  EstLogPropSharedPtr cseEstLogProps_;
+
+  // the temp scan to replace this node after the SQO phase
   RelExpr *tempScan_;
 
 }; // class CommonSubExprRef

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/RelScan.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/RelScan.h b/core/sql/optimizer/RelScan.h
index b2c7add..4f24dc0 100644
--- a/core/sql/optimizer/RelScan.h
+++ b/core/sql/optimizer/RelScan.h
@@ -499,8 +499,9 @@ public:
        const ValueIdSet &predicatesToRemove,
        const ValueIdSet &commonPredicatesToAdd,
        const ValueIdSet &inputsToRemove,
-       CSEInfo *info,
-       NABoolean testRun);
+       ValueIdSet &valuesForVEGRewrite,
+       ValueIdSet &keyColumns,
+       CSEInfo *info);
 
  // synthesizes compRefOpt constraints.
   virtual void processCompRefOptConstraints(NormWA * normWAPtr) ;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/RelSet.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/RelSet.h b/core/sql/optimizer/RelSet.h
index 00eff98..434b91a 100644
--- a/core/sql/optimizer/RelSet.h
+++ b/core/sql/optimizer/RelSet.h
@@ -261,8 +261,9 @@ public:
        const ValueIdSet &predicatesToRemove,
        const ValueIdSet &commonPredicatesToAdd,
        const ValueIdSet &inputsToRemove,
-       CSEInfo *info,
-       NABoolean testRun);
+       ValueIdSet &valuesForVEGRewrite,
+       ValueIdSet &keyColumns,
+       CSEInfo *info);
 
   // ---------------------------------------------------------------------
   // Function for testing the eligibility of this

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/VEGTable.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/VEGTable.cpp b/core/sql/optimizer/VEGTable.cpp
index 5e92bcb..7b07531 100644
--- a/core/sql/optimizer/VEGTable.cpp
+++ b/core/sql/optimizer/VEGTable.cpp
@@ -1693,16 +1693,29 @@ VEGRegion * VEGTable::getVEGRegion(const ExprNode * const ownerExpr,
 				   Lng32 subtreeId) const
 {
   VEGRegion* candidateRegion = NULL;
+  const ExprNode * ownerOrOrig = ownerExpr;
   
-  // Loop through the regions to find the one for this ExprNode
-  for (RegionId i = FIRST_VEG_REGION;
-       i < (RegionId)arrayEntry_.entries();
-       i++)
+  // Loop over the node and its original expressions
+  while (ownerOrOrig)
     {
-      candidateRegion = arrayEntry_[i];
-      if ( (candidateRegion->getOwnerExpr() == ownerExpr) AND 
-	   (candidateRegion->getSubtreeId() == subtreeId))
-	return candidateRegion;
+      // Loop through the regions to find the one for this ExprNode
+      for (RegionId i = FIRST_VEG_REGION;
+           i < (RegionId)arrayEntry_.entries();
+           i++)
+        {
+          candidateRegion = arrayEntry_[i];
+          if ( (candidateRegion->getOwnerExpr() == ownerOrOrig) AND 
+               (candidateRegion->getSubtreeId() == subtreeId))
+            return candidateRegion;
+        }
+
+      // VEGRegion not found, try with one of the original expressions
+      const RelExpr *re = ownerOrOrig->castToRelExpr();
+
+      if (re && re->getOriginalExpr(FALSE) != re)
+        ownerOrOrig = re->getOriginalExpr(FALSE);
+      else
+        ownerOrOrig = NULL;
     }
 
     // Didn't find any

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/ValueDesc.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/ValueDesc.cpp b/core/sql/optimizer/ValueDesc.cpp
index b0d77ba..c308854 100644
--- a/core/sql/optimizer/ValueDesc.cpp
+++ b/core/sql/optimizer/ValueDesc.cpp
@@ -3293,7 +3293,7 @@ void ValueIdSet::replaceOperandsOfInstantiateNull
 // For each ValueId in other check whether it is referenced anywhere
 // within an ItemExpr whose ValueId belongs to this set.
 // -----------------------------------------------------------------------
-void ValueIdSet::weedOutUnreferenced(ValueIdSet & other)
+void ValueIdSet::weedOutUnreferenced(ValueIdSet & other) const
 {
   ValueIdSet unrefSet;
   NABoolean notFound;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/ValueDesc.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/ValueDesc.h b/core/sql/optimizer/ValueDesc.h
index d423eb6..d69d988 100644
--- a/core/sql/optimizer/ValueDesc.h
+++ b/core/sql/optimizer/ValueDesc.h
@@ -1080,7 +1080,7 @@ public:
   // Delete all values in other that are not referenced by the value
   // expressions that belong to this set.
   // --------------------------------------------------------------------
-  void weedOutUnreferenced(ValueIdSet & other);
+  void weedOutUnreferenced(ValueIdSet & other) const;
 
   // --------------------------------------------------------------------
   // weedOutNonEquiPreds()

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/parser/sqlparser.y
----------------------------------------------------------------------
diff --git a/core/sql/parser/sqlparser.y b/core/sql/parser/sqlparser.y
index b6a6d88..3a2b3aa 100755
--- a/core/sql/parser/sqlparser.y
+++ b/core/sql/parser/sqlparser.y
@@ -6727,7 +6727,7 @@ table_name_and_hint : table_name optimizer_hint hbase_access_options
                               if ($3)
                                 cse->setHbaseAccessOptions($3);
 
-                              cse->addToCmpStatement();
+                              cse->addToCmpStatement(TRUE);
                               $$ = cse;
                             }
                           else
@@ -7061,7 +7061,7 @@ table_name_as_clause_and_hint : table_name as_clause optimizer_hint hbase_access
                                          if ($4)
                                            cse->setHbaseAccessOptions($4);
 
-                                         cse->addToCmpStatement();
+                                         cse->addToCmpStatement(TRUE);
                                          $$ = cse;
                                        }
                                      else

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/regress/compGeneral/EXPECTED045
----------------------------------------------------------------------
diff --git a/core/sql/regress/compGeneral/EXPECTED045 b/core/sql/regress/compGeneral/EXPECTED045
index 6489b5d..84692d9 100644
--- a/core/sql/regress/compGeneral/EXPECTED045
+++ b/core/sql/regress/compGeneral/EXPECTED045
@@ -466,6 +466,29 @@
 
 --- SQL operation complete.
 >>
+>>insert into date_dim values (
++>  1, '1', date '2000-01-01', 1, 1, 1, 2000, 1, 1, 1, 1, 2000, 1, 1, 'aday', 'aq', ' ', ' ', ' ', 1, 31, 0, 0, ' ', ' ', ' ', ' ', ' '
++>);
+
+--- 1 row(s) inserted.
+>>
+>>insert into store_sales values (
++>  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0
++>);
+
+--- 1 row(s) inserted.
+>>
+>>update statistics for table date_dim on every column;
+
+--- SQL operation complete.
+>>update statistics for table date_dim on (d_qoy, d_year);
+
+*** WARNING[9202] UPDATE STATISTICS has located previously generated histograms that are not being regenerated. This may affect the plans that will be generated. Missing columns lists are (D_CURRENT_YEAR),(D_CURRENT_QUARTER),(D_CURRENT_MONTH),(D_CURRENT_WEEK),(D_CURRENT_DAY),(D_SAME_DAY_LQ),(D_SAME_DAY_LY),(D_LAST_DOM),(D_FIRST_DOM),(D_FOLLOWING_HOLIDAY),(D_WEEKEND),(D_HOLIDAY),(D_QUARTER_NAME),(D_DAY_NAME),(D_FY_WEEK_SEQ),(D_FY_QUARTER_SEQ),(D_FY_YEAR),(D_DOM),(D_MOY),(D_DOW),(D_QUARTER_SEQ),(D_WEEK_SEQ),(D_MONTH_SEQ),(D_DATE),(D_DATE_ID),(D_DATE_SK).
+
+--- SQL operation completed with warnings.
+>>update statistics for table store_sales on every column;
+
+--- SQL operation complete.
 >>
 >>--------------------------------------------------------------------
 >>obey TEST045(queries);
@@ -712,7 +735,7 @@ SCAN TEMP                                          2
 +> where d_week_seq1=d_week_seq2-53
 +> order by d_week_seq1;
 
-*** WARNING[5001] Common subexpression WSWSCS cannot be shared among multiple consumers. Reason: Operator map_value_ids not supported.
+*** WARNING[5001] Common subexpression WSCS will not be shared among multiple consumers. Reason: expression is only evaluated once because parent is materialized.
 
 --- SQL command prepared.
 >>-- use temp for wscs only, not wswscs, due to MapValueIds
@@ -1146,7 +1169,7 @@ SCAN TEMP                                          4
 
 *** WARNING[2997]  (Subquery was not unnested. Reason: No Correlation found)
 
-*** WARNING[2997] RESULTS (Operator map_value_ids not supported)
+*** WARNING[2997]  (Subquery was not unnested. Reason: No Correlation found)
 
 --- SQL command prepared.
 >>execute show_cses;
@@ -1154,9 +1177,9 @@ SCAN TEMP                                          4
 OPERATOR                        HOW_MANY            
 ------------------------------  --------------------
 
-BLOCKED_UNION                                      2
-HIVE_INSERT                                        1
-SCAN TEMP                                         15
+BLOCKED_UNION                                      5
+HIVE_INSERT                                        3
+SCAN TEMP                                         11
 
 --- 3 row(s) selected.
 >>execute s;
@@ -1229,9 +1252,7 @@ CHANNEL  I_BRAND_ID   I_CLASS_ID   I_CATEGORY_ID  SUM_SALES             NUMBER_S
 
 *** WARNING[2997]  (Subquery was not unnested. Reason: No Correlation found)
 
-*** WARNING[5001] Common subexpression FREQUENT_SS_ITEMS cannot be shared among multiple consumers. Reason: Differing inputs in CTE references, try CQD QUERY_CACHE '0'.
-
-*** WARNING[5001] Common subexpression BEST_SS_CUSTOMER cannot be shared among multiple consumers. Reason: Differing inputs in CTE references, try CQD QUERY_CACHE '0'.
+*** WARNING[5001] Common subexpression MAX_STORE_SALES will not be shared among multiple consumers. Reason: expression is only evaluated once because parent is materialized.
 
 --- SQL command prepared.
 >>execute show_cses;
@@ -1239,9 +1260,9 @@ CHANNEL  I_BRAND_ID   I_CLASS_ID   I_CATEGORY_ID  SUM_SALES             NUMBER_S
 OPERATOR                        HOW_MANY            
 ------------------------------  --------------------
 
-BLOCKED_UNION                                      2
-HIVE_INSERT                                        1
-SCAN TEMP                                          2
+BLOCKED_UNION                                      3
+HIVE_INSERT                                        2
+SCAN TEMP                                          4
 
 --- 3 row(s) selected.
 >>execute s;
@@ -1320,9 +1341,7 @@ SCAN TEMP                                          2
 
 *** WARNING[2997]  (Subquery was not unnested. Reason: No Correlation found)
 
-*** WARNING[5001] Common subexpression FREQUENT_SS_ITEMS cannot be shared among multiple consumers. Reason: Differing inputs in CTE references, try CQD QUERY_CACHE '0'.
-
-*** WARNING[5001] Common subexpression BEST_SS_CUSTOMER cannot be shared among multiple consumers. Reason: Differing inputs in CTE references, try CQD QUERY_CACHE '0'.
+*** WARNING[5001] Common subexpression MAX_STORE_SALES will not be shared among multiple consumers. Reason: expression is only evaluated once because parent is materialized.
 
 --- SQL command prepared.
 >>execute show_cses;
@@ -1330,9 +1349,9 @@ SCAN TEMP                                          2
 OPERATOR                        HOW_MANY            
 ------------------------------  --------------------
 
-BLOCKED_UNION                                      2
-HIVE_INSERT                                        1
-SCAN TEMP                                          2
+BLOCKED_UNION                                      3
+HIVE_INSERT                                        2
+SCAN TEMP                                          4
 
 --- 3 row(s) selected.
 >>execute s;
@@ -1583,9 +1602,13 @@ SCAN TEMP                                          2
 +>       > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end
 +> order by store_q1_q2_increase;
 
-*** WARNING[5001] Common subexpression SS cannot be shared among multiple consumers. Reason: Encountered VEGs with different constants in different consumers.
+*** WARNING[5001] Common subexpression SS will not be shared among multiple consumers. Reason: Encountered VEGs with different constants in different consumers.
+
+*** WARNING[5001] Common subexpression SS will not be shared among multiple consumers. Reason: Differing inputs in CTE references, try CQD QUERY_CACHE '0'.
+
+*** WARNING[5001] Common subexpression WS will not be shared among multiple consumers. Reason: Encountered VEGs with different constants in different consumers.
 
-*** WARNING[5001] Common subexpression WS cannot be shared among multiple consumers. Reason: Encountered VEGs with different constants in different consumers.
+*** WARNING[5001] Common subexpression WS will not be shared among multiple consumers. Reason: Differing inputs in CTE references, try CQD QUERY_CACHE '0'.
 
 --- SQL command prepared.
 >>-- Different constants used in different references of WITH clause - not yet supported

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/regress/compGeneral/TEST045
----------------------------------------------------------------------
diff --git a/core/sql/regress/compGeneral/TEST045 b/core/sql/regress/compGeneral/TEST045
index 36c8bf4..6a6d465 100644
--- a/core/sql/regress/compGeneral/TEST045
+++ b/core/sql/regress/compGeneral/TEST045
@@ -486,6 +486,17 @@ create table item
 --  )
 ;
 
+insert into date_dim values (
+  1, '1', date '2000-01-01', 1, 1, 1, 2000, 1, 1, 1, 1, 2000, 1, 1, 'aday', 'aq', ' ', ' ', ' ', 1, 31, 0, 0, ' ', ' ', ' ', ' ', ' '
+);
+
+insert into store_sales values (
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0
+);
+
+update statistics for table date_dim on every column;
+update statistics for table date_dim on (d_qoy, d_year);
+update statistics for table store_sales on every column;
 
 --------------------------------------------------------------------
 ?section queries

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/sqlcomp/DefaultConstants.h
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/DefaultConstants.h b/core/sql/sqlcomp/DefaultConstants.h
index 3dd96e6..af60786 100644
--- a/core/sql/sqlcomp/DefaultConstants.h
+++ b/core/sql/sqlcomp/DefaultConstants.h
@@ -3880,6 +3880,11 @@ enum DefaultConstants
 
   GROUP_BY_PUSH_TO_BOTH_SIDES_OF_JOIN,
 
+  CSE_TEMP_TABLE_MAX_SIZE,
+  CSE_TEMP_TABLE_MAX_MAX_SIZE,
+  CSE_COMMON_KEY_PRED_CONTROL,
+  CSE_PCT_KEY_COL_PRED_CONTROL,
+
   // This enum constant must be the LAST one in the list; it's a count,
   // not an Attribute (it's not IN DefaultDefaults; it's the SIZE of it)!
   __NUM_DEFAULT_ATTRIBUTES

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/sqlcomp/nadefaults.cpp
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/nadefaults.cpp b/core/sql/sqlcomp/nadefaults.cpp
index b6fa742..dab9e68 100644
--- a/core/sql/sqlcomp/nadefaults.cpp
+++ b/core/sql/sqlcomp/nadefaults.cpp
@@ -1152,16 +1152,23 @@ SDDkwd__(CAT_ENABLE_QUERY_INVALIDATION, "ON"),
  DDkwd__(CSE_CACHE_TEMP_QUERIES,               "OFF"),
  // "cleanup obsolete volatile tables" command cleans up Hive temp tables
  DDkwd__(CSE_CLEANUP_HIVE_TABLES,              "OFF"),
+ // don't temp if all consumers have preds on n key columns
+ DDui___(CSE_COMMON_KEY_PRED_CONTROL,          "1"),
  // emit warnings that help diagnose why CSEs are not shared
  DDkwd__(CSE_DEBUG_WARNINGS,                   "OFF"),
  // create a CommonSubExpr node for CTEs defined in WITH clauses (OFF/ON)
  DDkwd__(CSE_FOR_WITH,                         "OFF"),
  // use Hive tables as temp tables
  DDkwd__(CSE_HIVE_TEMP_TABLE,                  "ON"),
+ // don't temp if avg consumer has preds on more than n percent of key cols
+ DDflt0_(CSE_PCT_KEY_COL_PRED_CONTROL,         "49.9"),
  // print debugging info on stdout
  DDkwd__(CSE_PRINT_DEBUG_INFO,                 "OFF"),
+ // limit temp table size (based on max. card and regular card)
+ DDflt0_(CSE_TEMP_TABLE_MAX_MAX_SIZE,          "1E12"),
+ DDflt0_(CSE_TEMP_TABLE_MAX_SIZE,              "1E9"),
  // implement CommonSubExpr as a temp table (OFF/SYSTEM/ON)
- DDkwd__(CSE_USE_TEMP,                         "ON"),
+ DDkwd__(CSE_USE_TEMP,                         "SYSTEM"),
 
 SDDui___(CYCLIC_ESP_PLACEMENT,                  "1"),
 


[2/3] incubator-trafodion git commit: [TRAFODION-2315] Heuristic decision for common subexpressions

Posted by su...@apache.org.
[TRAFODION-2315] Heuristic decision for common subexpressions

This includes various fixes in addition to the heuristic decision,
mainly an improvement on how to navigate between parent and child
common subexpressions.


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/2d415568
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/2d415568
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/2d415568

Branch: refs/heads/master
Commit: 2d41556888367183b20dfc75c815c309d59a428c
Parents: c572f63
Author: Hans Zeller <hz...@apache.org>
Authored: Wed Jan 25 17:37:42 2017 +0000
Committer: Hans Zeller <hz...@apache.org>
Committed: Wed Jan 25 17:37:42 2017 +0000

----------------------------------------------------------------------
 core/sql/arkcmp/CmpStatement.cpp         |  46 ++-
 core/sql/arkcmp/CmpStatement.h           |   7 +-
 core/sql/bin/SqlciErrors.txt             |   2 +-
 core/sql/comexe/ComTdbHdfsScan.h         |   2 +-
 core/sql/executor/ExHdfsScan.cpp         | 110 +++---
 core/sql/executor/ExHdfsScan.h           |  11 +-
 core/sql/exp/ExpLOBinterface.h           |   3 +
 core/sql/generator/GenPreCode.cpp        |   5 +
 core/sql/optimizer/BindRelExpr.cpp       |  18 +-
 core/sql/optimizer/ColStatDesc.cpp       |  96 +++++
 core/sql/optimizer/ColStatDesc.h         |  25 +-
 core/sql/optimizer/NormRelExpr.cpp       | 518 ++++++++++++++++++--------
 core/sql/optimizer/OptLogRelExpr.cpp     |  77 +++-
 core/sql/optimizer/RelExeUtil.cpp        |  42 +++
 core/sql/optimizer/RelExeUtil.h          |   3 +
 core/sql/optimizer/RelExpr.cpp           | 421 ++++++++++++++++++---
 core/sql/optimizer/RelExpr.h             |  16 +-
 core/sql/optimizer/RelGrby.h             |   5 +-
 core/sql/optimizer/RelJoin.h             |   5 +-
 core/sql/optimizer/RelMisc.h             | 188 +++++++++-
 core/sql/optimizer/RelScan.h             |   5 +-
 core/sql/optimizer/RelSet.h              |   5 +-
 core/sql/optimizer/VEGTable.cpp          |  29 +-
 core/sql/optimizer/ValueDesc.cpp         |   2 +-
 core/sql/optimizer/ValueDesc.h           |   2 +-
 core/sql/parser/sqlparser.y              |   4 +-
 core/sql/regress/compGeneral/EXPECTED045 |  61 ++-
 core/sql/regress/compGeneral/TEST045     |  11 +
 core/sql/sqlcomp/DefaultConstants.h      |   5 +
 core/sql/sqlcomp/nadefaults.cpp          |   9 +-
 30 files changed, 1385 insertions(+), 348 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/arkcmp/CmpStatement.cpp
----------------------------------------------------------------------
diff --git a/core/sql/arkcmp/CmpStatement.cpp b/core/sql/arkcmp/CmpStatement.cpp
index ede0cb1..4c3cd24 100644
--- a/core/sql/arkcmp/CmpStatement.cpp
+++ b/core/sql/arkcmp/CmpStatement.cpp
@@ -1705,17 +1705,7 @@ void CmpStatement::setTMUDFRefusedRequirements(const char *details)
   detailsOnRefusedRequirements_->insert(new(heap_) NAString(details, heap_));
 }
 
-void CmpStatement::addCSEInfo(CSEInfo *info)
-{
-  if (cses_ == NULL)
-    cses_ = new(CmpCommon::statementHeap())
-      LIST(CSEInfo *)(CmpCommon::statementHeap());
-
-  info->setCSEId(cses_->entries());
-  cses_->insert(info);
-}
-
-CSEInfo * CmpStatement::getCSEInfo(const char *cseName)
+CSEInfo * CmpStatement::getCSEInfo(const char *cseName) const
 {
   if (cses_)
     for (CollIndex i=0; i<cses_->entries(); i++)
@@ -1727,3 +1717,37 @@ CSEInfo * CmpStatement::getCSEInfo(const char *cseName)
   // no match found
   return NULL;
 }
+
+CSEInfo * CmpStatement::getCSEInfoForMainQuery() const
+{
+  // the first entry is reserved for the main query
+  return getCSEInfoById(getCSEIdForMainQuery());
+}
+
+CSEInfo *CmpStatement::getCSEInfoById(Int32 cseId) const
+{
+  DCMPASSERT(cses_);
+  CSEInfo *result = (*cses_)[cseId];
+
+  CMPASSERT(result->getCSEId() == cseId);
+
+  return result;
+}
+
+void CmpStatement::addCSEInfo(CSEInfo *info)
+{
+  if (cses_ == NULL)
+    {
+      cses_ = new(CmpCommon::statementHeap())
+        LIST(CSEInfo *)(CmpCommon::statementHeap());
+
+      // add an entry for the main query, so we can
+      // record the CSE references of the main query
+      DCMPASSERT(cses_->entries() == getCSEIdForMainQuery());
+      addCSEInfo(new(CmpCommon::statementHeap())
+                 CSEInfo("", CmpCommon::statementHeap()));
+    }
+
+  info->setCSEId(cses_->entries());
+  cses_->insert(info);
+}

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/arkcmp/CmpStatement.h
----------------------------------------------------------------------
diff --git a/core/sql/arkcmp/CmpStatement.h b/core/sql/arkcmp/CmpStatement.h
index 0d5b399..8e9f6cf 100644
--- a/core/sql/arkcmp/CmpStatement.h
+++ b/core/sql/arkcmp/CmpStatement.h
@@ -227,8 +227,11 @@ public:
   short getDDLExprAndNode(char * sqlStr, Lng32 inputCS,
                           DDLExpr* &ddlExpr, ExprNode* &ddlNode);
 
-  CSEInfo *getCSEInfo(const char *cseName);
-  const LIST(CSEInfo *) *getCSEInfoList() { return cses_; }
+  CSEInfo *getCSEInfo(const char *cseName) const;
+  CSEInfo *getCSEInfoForMainQuery() const;
+  static Int32 getCSEIdForMainQuery() { return 0; }
+  CSEInfo *getCSEInfoById(Int32 cseId) const;
+  const LIST(CSEInfo *) *getCSEInfoList() const { return cses_; }
   void addCSEInfo(CSEInfo *info);
 
 protected:

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/bin/SqlciErrors.txt
----------------------------------------------------------------------
diff --git a/core/sql/bin/SqlciErrors.txt b/core/sql/bin/SqlciErrors.txt
index a7f9bf9..665649d 100644
--- a/core/sql/bin/SqlciErrors.txt
+++ b/core/sql/bin/SqlciErrors.txt
@@ -1447,7 +1447,7 @@ $1~String1 --------------------------------
 4492 ZZZZZ 99999 BEGINNER MINOR LOGONLY BULK LOAD option UPDATE STATISTICS cannot be used with UPSERT USING LOAD option.
 4493 ZZZZZ 99999 BEGINNER MINOR LOGONLY Stored Descriptor Status: $0~String0
 5000 ZZZZZ 99999 ADVANCED MAJOR DBADMIN Internal error in the query normalizer.
-5001 ZZZZZ 99999 ADVANDED MINOR LOGONLY Common subexpression $0~String0 cannot be shared among multiple consumers. Reason: $1~String1.
+5001 ZZZZZ 99999 ADVANDED MINOR LOGONLY Common subexpression $0~String0 will not be shared among multiple consumers. Reason: $1~String1.
 6000 ZZZZZ 99999 ADVANCED MAJOR DBADMIN Internal error in the query optimizer.
 6001 0A000 99999 BEGINNER MAJOR DBADMIN DISTINCT aggregates can be computed for only one column per table expression.
 6002 ZZZZZ 99999 BEGINNER MAJOR DBADMIN The metadata table HISTOGRAMS or HISTOGRAM_INTERVALS contains invalid values.  If you have manually modified the metadata table, then you should undo your changes using the CLEAR option in UPDATE STATISTICS.

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/comexe/ComTdbHdfsScan.h
----------------------------------------------------------------------
diff --git a/core/sql/comexe/ComTdbHdfsScan.h b/core/sql/comexe/ComTdbHdfsScan.h
index 29718b7..9628d06 100755
--- a/core/sql/comexe/ComTdbHdfsScan.h
+++ b/core/sql/comexe/ComTdbHdfsScan.h
@@ -242,7 +242,7 @@ public:
   void   setHiveScanMode(UInt32 v ) { hiveScanMode_ = v; }
   UInt32 getHiveScanMode() { return hiveScanMode_; }
 
-  Queue* getHdfsFileInfoList() {return hdfsFileInfoList_;}
+  Queue* getHdfsFileInfoList() const {return hdfsFileInfoList_;}
   Queue* getHdfsFileRangeBeginList() {return hdfsFileRangeBeginList_;}
   Queue* getHdfsFileRangeNumList() {return hdfsFileRangeNumList_;}
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/executor/ExHdfsScan.cpp
----------------------------------------------------------------------
diff --git a/core/sql/executor/ExHdfsScan.cpp b/core/sql/executor/ExHdfsScan.cpp
index 0e4d4c9..49d87e9 100644
--- a/core/sql/executor/ExHdfsScan.cpp
+++ b/core/sql/executor/ExHdfsScan.cpp
@@ -117,9 +117,8 @@ ExHdfsScanTcb::ExHdfsScanTcb(
   , checkRangeDelimiter_(FALSE)
   , dataModCheckDone_(FALSE)
   , loggingErrorDiags_(NULL)
-  , runTimeRanges_(NULL)
-  , numRunTimeRanges_(0)
   , loggingFileName_(NULL)
+  , hdfsFileInfoListAsArray_(glob->getDefaultHeap(), hdfsScanTdb.getHdfsFileInfoList()->numEntries())
 {
   Space * space = (glob ? glob->getSpace() : 0);
   CollHeap * heap = (glob ? glob->getDefaultHeap() : 0);
@@ -205,6 +204,19 @@ ExHdfsScanTcb::ExHdfsScanTcb(
                                         jniDebugPort,
                                         jniDebugTimeout);
 
+  // Populate the hdfsInfo list into an array to gain o(1) lookup access
+  Queue* hdfsInfoList = hdfsScanTdb.getHdfsFileInfoList();
+  if ( hdfsInfoList && hdfsInfoList->numEntries() > 0 )
+  {
+     hdfsInfoList->position();
+     int i = 0;
+     HdfsFileInfo* fInfo = NULL;
+     while ((fInfo = (HdfsFileInfo*)hdfsInfoList->getNext()) != NULL)
+        {
+          hdfsFileInfoListAsArray_.insertAt(i, fInfo);
+          i++;
+        }
+  }
 }
     
 ExHdfsScanTcb::~ExHdfsScanTcb()
@@ -266,8 +278,7 @@ void ExHdfsScanTcb::freeResources()
     delete qparent_.down;
     qparent_.down = NULL;
   }
-  if (runTimeRanges_)
-    deallocateRuntimeRanges();
+  deallocateRuntimeRanges();
   if (lobGlob_) { 
      ExpLOBinterfaceCleanup(lobGlob_, getGlobals()->getDefaultHeap());
      lobGlob_ = NULL;
@@ -406,7 +417,7 @@ ExWorkProcRetcode ExHdfsScanTcb::work()
                 step_ = ASSIGN_RANGES_AT_RUNTIME;
                 break;
               }
-	    else if (hdfsScanTdb().getHdfsFileInfoList()->isEmpty())
+	    else if (getHdfsFileInfoListAsArray().isEmpty())
 	      {
                 step_ = CHECK_FOR_DATA_MOD_AND_DONE;
 		break;
@@ -1708,6 +1719,7 @@ void ExHdfsScanTcb::computeRangesAtRuntime()
   Int64 myShare = 0;
   Int64 runningSum = 0;
   Int64 myStartPositionInBytes = 0;
+  Int64 myEndPositionInBytes = 0;
   Int64 firstFileStartingOffset = 0;
   Int64 lastFileBytesToRead = -1;
   Int32 numParallelInstances = MAXOF(getGlobals()->getNumOfInstances(),1);
@@ -1718,8 +1730,7 @@ void ExHdfsScanTcb::computeRangesAtRuntime()
                                               hdfsScanTdb().hdfsRootDir_,
                                               &numFiles);
 
-  if (runTimeRanges_)
-    deallocateRuntimeRanges();
+  deallocateRuntimeRanges();
 
   // in a first round, count the total number of bytes
   for (int f=0; f<numFiles; f++)
@@ -1738,8 +1749,17 @@ void ExHdfsScanTcb::computeRangesAtRuntime()
 
   if (totalSize > 0)
     {
+      if (myInstNum_ < numParallelInstances-1)
+        // read "myShare" bytes
+        myEndPositionInBytes = myStartPositionInBytes + myShare;
+      else
+        // the last ESP reads whatever is remaining
+        myEndPositionInBytes = totalSize;
+
       // second round, find out the range of files I need to read
-      for (int g=0; g<numFiles; g++)
+      for (int g=0;
+           g < numFiles && runningSum < myEndPositionInBytes;
+           g++)
         {
           Int64 prevSum = runningSum;
 
@@ -1757,80 +1777,62 @@ void ExHdfsScanTcb::computeRangesAtRuntime()
 
               numRanges_++;
 
-              if (runningSum > (myStartPositionInBytes + myShare) &&
-                  myInstNum_ < numParallelInstances-1)
-                // the next file is beyond the range that I need to read
-                lastFileBytesToRead =
-                  myStartPositionInBytes + myShare - prevSum;
-                break;
-            }
-        }
+              if (runningSum > myEndPositionInBytes)
+                // I don't need to read all the way to the end of this file
+                lastFileBytesToRead = myEndPositionInBytes - prevSum;
 
-      // now that we now how many ranges we need, allocate them
-      numRunTimeRanges_ = numRanges_;
-      runTimeRanges_ = new(getHeap()) HdfsFileInfo[numRunTimeRanges_];
-    }
+            } // file is at or beyond my starting file
+        } // loop over files, determining ranges
+    } // total size > 0
   else
     beginRangeNum_ = 0;
 
   // third round, populate the ranges that this ESP needs to read
   for (int h=beginRangeNum_; h<beginRangeNum_+numRanges_; h++)
     {
-      HdfsFileInfo &e(runTimeRanges_[h-beginRangeNum_]);
+      HdfsFileInfo *e = new(getHeap()) HdfsFileInfo;
       const char *fileName = fileInfos[h].mName;
       Int32 fileNameLen = strlen(fileName) + 1;
 
-      e.entryNum_ = h;
-      e.flags_    = 0;
-      e.fileName_ = new(getHeap()) char[fileNameLen];
-      str_cpy_all(e.fileName_, fileName, fileNameLen);
+      e->entryNum_ = h;
+      e->flags_    = 0;
+      e->fileName_ = new(getHeap()) char[fileNameLen];
+      str_cpy_all(e->fileName_, fileName, fileNameLen);
       if (h == beginRangeNum_ &&
           firstFileStartingOffset > 0)
         {
-          e.startOffset_ = firstFileStartingOffset;
-          e.setFileIsSplitBegin(TRUE);
+          e->startOffset_ = firstFileStartingOffset;
+          e->setFileIsSplitBegin(TRUE);
         }
       else
-        e.startOffset_ = 0;
+        e->startOffset_ = 0;
 
       
       if (h == beginRangeNum_+numRanges_-1 && lastFileBytesToRead > 0)
         {
-          e.bytesToRead_ = lastFileBytesToRead;
-          e.setFileIsSplitEnd(TRUE);
+          e->bytesToRead_ = lastFileBytesToRead;
+          e->setFileIsSplitEnd(TRUE);
         }
       else
-        e.bytesToRead_ = (Int64) fileInfos[h].mSize;
-    }
-}
+        e->bytesToRead_ = (Int64) fileInfos[h].mSize;
 
-void ExHdfsScanTcb::deallocateRuntimeRanges()
-{
-  if (runTimeRanges_)
-    {
-      for (int i=0; i<numRunTimeRanges_; i++)
-        NADELETEBASIC(runTimeRanges_[i].fileName_.getPointer(), getHeap());
-      NADELETEBASIC(runTimeRanges_, getHeap());
-      runTimeRanges_ = NULL;
-      numRunTimeRanges_ = 0;
+      hdfsFileInfoListAsArray_.insertAt(h, e);
     }
 }
 
-HdfsFileInfo * ExHdfsScanTcb::getRange(Int32 r)
+void ExHdfsScanTcb::deallocateRuntimeRanges()
 {
-  ex_assert(r >= beginRangeNum_ &&
-            r < beginRangeNum_+numRanges_,
-            "HDFS scan range num out of range");
-
-  if (hdfsScanTdb().getAssignRangesAtRuntime())
+  if (hdfsScanTdb().getAssignRangesAtRuntime() &&
+      hdfsFileInfoListAsArray_.entries() > 0)
     {
-      ex_assert(numRunTimeRanges_ == numRanges_,
-                "numRunTimeRanges_ != numRanges_");
-      return &runTimeRanges_[r-beginRangeNum_];
+      for (int i=0; i<hdfsFileInfoListAsArray_.getUsedLength(); i++)
+        if (hdfsFileInfoListAsArray_.used(i))
+          {
+            NADELETEBASIC(hdfsFileInfoListAsArray_[i]->fileName_.getPointer(), getHeap());
+            NADELETEBASIC(hdfsFileInfoListAsArray_[i], getHeap());
+          }
+      hdfsFileInfoListAsArray_.clear();
     }
-  else
-    return (HdfsFileInfo*)
-      hdfsScanTdb().getHdfsFileInfoList()->get(r);
 }
 
 short ExHdfsScanTcb::moveRowToUpQueue(const char * row, Lng32 len, 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/executor/ExHdfsScan.h
----------------------------------------------------------------------
diff --git a/core/sql/executor/ExHdfsScan.h b/core/sql/executor/ExHdfsScan.h
index 25f3ac3..407fa48 100644
--- a/core/sql/executor/ExHdfsScan.h
+++ b/core/sql/executor/ExHdfsScan.h
@@ -212,7 +212,7 @@ protected:
 
   void computeRangesAtRuntime();
   void deallocateRuntimeRanges();
-  HdfsFileInfo *getRange(Int32 r);
+  HdfsFileInfo *getRange(Int32 r) { return getHdfsFileInfoListAsArray().at(r); }
 
   short moveRowToUpQueue(const char * row, Lng32 len, 
                          short * rc, NABoolean isVarchar);
@@ -223,6 +223,10 @@ protected:
   short setupError(Lng32 exeError, Lng32 retcode, 
                    const char * str, const char * str2, const char * str3);
 
+  // Get the array representation of the HdfsFileInfoList to aid in
+  // o(1) access of an entry given an index.
+  HdfsFileInfoArray& getHdfsFileInfoListAsArray() {return hdfsFileInfoListAsArray_;}
+
   /////////////////////////////////////////////////////
   // Private data.
   /////////////////////////////////////////////////////
@@ -275,8 +279,6 @@ protected:
   Lng32 myInstNum_;
   Lng32 beginRangeNum_;
   Lng32 numRanges_;
-  HdfsFileInfo *runTimeRanges_;
-  Int32 numRunTimeRanges_;
   Lng32 currRangeNum_;
   char *endOfRequestedRange_ ; // helps rows span ranges.
   char * hdfsFileName_;
@@ -299,6 +301,9 @@ protected:
 
   NABoolean dataModCheckDone_;
   ComDiagsArea * loggingErrorDiags_;
+
+  // this array is populated from the info list stored as Queue.
+  HdfsFileInfoArray hdfsFileInfoListAsArray_;
 };
 
 class ExOrcScanTcb  : public ExHdfsScanTcb

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/exp/ExpLOBinterface.h
----------------------------------------------------------------------
diff --git a/core/sql/exp/ExpLOBinterface.h b/core/sql/exp/ExpLOBinterface.h
index 33d63b4..1d5dbaf 100644
--- a/core/sql/exp/ExpLOBinterface.h
+++ b/core/sql/exp/ExpLOBinterface.h
@@ -66,6 +66,9 @@ class HdfsFileInfo
   Int64 bytesToRead_;
 };
 
+typedef HdfsFileInfo* HdfsFileInfoPtr;
+typedef NAArray<HdfsFileInfoPtr> HdfsFileInfoArray;
+
 #include "ExpLOBaccess.h"
 
 #define LOB_ACCESS_SUCCESS 0

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/generator/GenPreCode.cpp
----------------------------------------------------------------------
diff --git a/core/sql/generator/GenPreCode.cpp b/core/sql/generator/GenPreCode.cpp
index 6551132..f0f92e5 100644
--- a/core/sql/generator/GenPreCode.cpp
+++ b/core/sql/generator/GenPreCode.cpp
@@ -6195,6 +6195,11 @@ RelExpr * MapValueIds::preCodeGen(Generator * generator,
                   {
                     availableValues -= subtractions;
                     availableValues += additions;
+                    // do the same for valuesNeededForVEGRewrite_,
+                    // which will be used for rewriting the char.
+                    // outputs
+                    valuesNeededForVEGRewrite_ -= subtractions;
+                    valuesNeededForVEGRewrite_ += additions;
                   }
               }
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/BindRelExpr.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/BindRelExpr.cpp b/core/sql/optimizer/BindRelExpr.cpp
index d51624d..20a5d00 100644
--- a/core/sql/optimizer/BindRelExpr.cpp
+++ b/core/sql/optimizer/BindRelExpr.cpp
@@ -17669,21 +17669,23 @@ RelExpr * CommonSubExprRef::bindNode(BindWA *bindWA)
 
   DCMPASSERT(info);
 
+  // eliminate any CommonSubExprRef nodes that are not truly common,
+  // i.e. those that are referenced only once
+  if (info->getNumConsumers() <= 1)
+    {
+      info->eliminate();
+      return child(0).getPtr()->bindNode(bindWA);
+    }
+
   bindWA->setInCSE(this);
 
-  if (parentCSE)
-    // establish the parent/child relationship, if not done already
-    CmpCommon::statement()->getCSEInfo(parentCSE->getName())->addChildCSE(info);
+  // establish the parent/child relationship
+  addParentRef(parentCSE);
 
   bindChildren(bindWA);
   if (bindWA->errStatus())
     return this;
 
-  // eliminate any CommonSubExprRef nodes that are not truly common,
-  // i.e. those that are referenced only once
-  if (CmpCommon::statement()->getCSEInfo(internalName_)->getNumConsumers() <= 1)
-    return child(0).getPtr();
-
   // we know that our child is a RenameTable (same name as this CSE,
   // whose child is a RelRoot, defining the CTE. Copy the bound select
   // list of the CTE.

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/ColStatDesc.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/ColStatDesc.cpp b/core/sql/optimizer/ColStatDesc.cpp
index 4295062..9e2ce36 100644
--- a/core/sql/optimizer/ColStatDesc.cpp
+++ b/core/sql/optimizer/ColStatDesc.cpp
@@ -144,6 +144,29 @@ ColStatDesc::copy (const ColStatDesc& other)
 }
 
 void
+ColStatDesc::mapUpAndCopy (const ColStatDesc& other, ValueIdMap &map)
+{
+  copy(other);
+  map.mapValueIdUp(column_, other.column_);
+  map.mapValueIdUp(VEGcolumn_, other.VEGcolumn_);
+
+  // if the map only maps the column or VEGcolumn but not both, then
+  // map both to the same value id
+  if (column_ == other.column_ && VEGcolumn_ != other.VEGcolumn_)
+    column_ = VEGcolumn_;
+  if (VEGcolumn_ == other.VEGcolumn_ && column_ != other.column_)
+    VEGcolumn_ = column_;
+
+  // rewrite applied predicates
+  if (!appliedPreds_.isEmpty() &&
+      (column_ != other.column_ || VEGcolumn_ != other.VEGcolumn_))
+    {
+      appliedPreds_.clear();
+      map.rewriteValueIdSetUp(appliedPreds_, other.appliedPreds_);
+    }
+}
+
+void
 ColStatDesc::deallocate()
 {
   colStats_ = NULL;
@@ -2061,6 +2084,41 @@ ColStatDescList::insertDeepCopyAt (const CollIndex entry,
 } // insertDeepCopyAt
 
 void
+ColStatDescList::makeMappedDeepCopy (const ColStatDescList & source,
+                                     ValueIdMap &map,
+                                     NABoolean includeUnmappedColumns)
+{
+  // similar to makeDeepCopy, but this method maps all the ValueIds in
+  // the source, using the provided map, in the "up" direction
+  for ( CollIndex i = 0; i < source.entries(); i++ )
+    {
+      ValueId topVid;
+      ValueId bottomVid(source[i]->getVEGColumn());
+
+      map.mapValueIdUp(topVid, bottomVid);
+
+      if (includeUnmappedColumns || topVid != bottomVid)
+        {
+          ColStatDescSharedPtr tmpColStatDescPtr(
+               new (HISTHEAP) ColStatDesc(), HISTHEAP);
+          tmpColStatDescPtr->mapUpAndCopy(*source[i], map);
+          ColStatsSharedPtr tmpColStatsPtr = tmpColStatDescPtr->getColStatsToModify();
+          tmpColStatsPtr->copyAndScaleHistogram(1.0);
+          tmpColStatsPtr->setShapeChanged(tmpColStatsPtr->isShapeChanged()); 
+
+          insert(tmpColStatDescPtr);
+        }
+    }
+
+  // map the multi-column UECs
+  MultiColumnUecList * mappedMultiColUECs = new (CmpCommon::statementHeap())
+      MultiColumnUecList();
+
+  mappedMultiColUECs->insertMappedList(source.getUecList(), map);
+  setUecList(mappedMultiColUECs);
+} // makeMappedDeepCopy
+
+void
 ColStatDescList::removeDeepCopyAt (const CollIndex entry)
 {
   removeAt( entry );
@@ -9050,6 +9108,44 @@ MultiColumnUecList::insertList (const MultiColumnUecList * other)
 }
 
 // -----------------------------------------------------------------------
+// MultiColumnUecList::insertMappedList
+//
+// inserts all entries from OTHER into THIS, after mapping the ValueIds
+// in the list, using MAP. Note that we do the mapping in the "up"
+// direction.
+// -----------------------------------------------------------------------
+void
+MultiColumnUecList::insertMappedList(const MultiColumnUecList *other,
+                                     const ValueIdMap &map)
+{
+  if ( other == NULL ) return;
+  if ( other->entries() == 0 ) return;
+
+  ValueIdSet * keyEntry = NULL;
+  CostScalar * uecEntry = NULL;
+
+  MultiColumnUecListIterator iter( *other );
+
+  iter.getNext( keyEntry, uecEntry );
+
+  while ( keyEntry != NULL && uecEntry != NULL )
+    {
+      ValueIdSet *mappedSet = new(HISTHEAP) ValueIdSet;
+
+      map.mapValueIdSetUp(*mappedSet, *keyEntry);
+      // Todo: CSE: This is unlikely to work, since the stats will be
+      // expressed in BaseColumns, while the map contains VEGRefs.
+      // Uncomment the assert below and run compGeneral/TEST045
+      // to see the problem.
+      // DCMPASSERT(*mappedSet != *keyEntry);
+      if ( NOT contains( mappedSet ) )
+        insertPair( *mappedSet, *uecEntry );
+
+      iter.getNext( keyEntry, uecEntry );
+    }
+}
+
+// -----------------------------------------------------------------------
 // MultiColumnUecList::insertPair
 //
 // inserts a <table-column-valueidset, groupUec> pair

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/ColStatDesc.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/ColStatDesc.h b/core/sql/optimizer/ColStatDesc.h
index 4215cd9..87d2330 100644
--- a/core/sql/optimizer/ColStatDesc.h
+++ b/core/sql/optimizer/ColStatDesc.h
@@ -205,14 +205,14 @@ public:
   // HIST_MISSING_STATS_WARNING_LEVEL - The CQD has 5 values
   // It is used to control the number of missing stats warnings
   // that should be generated. 
-  // 0 \ufffd Display no warnings.
-  // 1 \ufffd Display only missing single column stats warnings. These include 6008 and 6011 
-  // 2 \ufffd Display all single column missing stats warnings and 
-  //     multi-column missing stats warnings for Scans only. 
-  // 3 \ufffd Display all missing single column stats warnings and missing 
-  //     multi-column stats warnings for Scans and GroupBy operators only..
-  // 4 \ufffd Display all missing single column stats and missing multi-column 
-  //     stats warnings for all operators including Scans, Joins and groupBys.
+  // 0: Display no warnings.
+  // 1: Display only missing single column stats warnings. These include 6008 and 6011 
+  // 2: Display all single column missing stats warnings and 
+  //    multi-column missing stats warnings for Scans only. 
+  // 3: Display all missing single column stats warnings and missing 
+  //    multi-column stats warnings for Scans and GroupBy operators only..
+  // 4: Display all missing single column stats and missing multi-column 
+  //    stats warnings for all operators including Scans, Joins and groupBys.
   // THE CQD also does not have an impact on the auto update stats behavior. The stats will
   // still be automatically generated even if the warnings have been suppressed.
   // Default behavior is to generate all warnings
@@ -305,6 +305,9 @@ public:
   // THIS (the ones that aren't already there)
   void insertList (const MultiColumnUecList * other) ;
 
+  void insertMappedList(const MultiColumnUecList *other,
+                        const ValueIdMap &map); // map is used in "up" direction
+
   // this routine answers whether there's any bona fide multi-column
   // information contained in this list -- i.e., any
   // <valueidset,costscalar> pairs where the valueidset has more than one
@@ -496,6 +499,8 @@ public:
 
   CostScalar getInputCard()  { return inputCard_; }
 
+  void mapUpAndCopy (const ColStatDesc& other, ValueIdMap &map) ;
+
   // synchronize/map the RowCount and UEC change of one set of aggregate
   // statistics with the current set of aggregate statistics
   void synchronizeStats (const CostScalar & baseRowcount,
@@ -723,6 +728,10 @@ public:
                          const ColStatDescSharedPtr & source,
                          const CostScalar & scale = 1,
                          const NABoolean shapeChangedMask = TRUE) ;
+  void makeMappedDeepCopy(
+                         const ColStatDescList & source,
+                         ValueIdMap &map, // map source "up"
+                         NABoolean includeUnmappedColumns);
 
   void removeDeepCopyAt (const CollIndex entry) ;
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/NormRelExpr.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/NormRelExpr.cpp b/core/sql/optimizer/NormRelExpr.cpp
index 90e91fa..998d7fa 100644
--- a/core/sql/optimizer/NormRelExpr.cpp
+++ b/core/sql/optimizer/NormRelExpr.cpp
@@ -4673,8 +4673,9 @@ NABoolean Join::prepareMeForCSESharing(
      const ValueIdSet &predicatesToRemove,
      const ValueIdSet &commonPredicatesToAdd,
      const ValueIdSet &inputsToRemove,
-     CSEInfo *info,
-     NABoolean testRun)
+     ValueIdSet &valuesForVEGRewrite,
+     ValueIdSet &keyColumns,
+     CSEInfo *info)
 {
   if (isTSJForWrite() ||
       isTSJForUndo() ||
@@ -4682,15 +4683,12 @@ NABoolean Join::prepareMeForCSESharing(
       getIsForTrafLoadPrep())
     return FALSE;
 
-  if (!testRun)
-    {
-      // The caller of this methods added "commonPredicatesToAdd" to
-      // predicates_ (the generic selection predicates stored in the
-      // RelExpr). That works for both inner and non-inner joins.  The
-      // only thing we have left to do is to recompute the equi-join
-      // predicates.
-      findEquiJoinPredicates();
-    }
+  // The caller of this methods added "commonPredicatesToAdd" to
+  // predicates_ (the generic selection predicates stored in the
+  // RelExpr). That works for both inner and non-inner joins.  The
+  // only thing we have left to do is to recompute the equi-join
+  // predicates.
+  findEquiJoinPredicates();
 
   return TRUE;
 }
@@ -5013,8 +5011,9 @@ NABoolean Union::prepareTreeForCSESharing(
      const ValueIdSet &predicatesToRemove,
      const ValueIdSet &commonPredicatesToAdd,
      const ValueIdSet &inputsToRemove,
-     CSEInfo *info,
-     NABoolean testRun)
+     ValueIdSet &valuesForVEGRewrite,
+     ValueIdSet &keyColumns,
+     CSEInfo *info)
 {
   NABoolean result = TRUE;
 
@@ -5024,8 +5023,7 @@ NABoolean Union::prepareTreeForCSESharing(
   if (getSelectionPred().entries() > 0)
     {
       info->getConsumer(0)->emitCSEDiagnostics(
-           "Selection predicates on union node not supported",
-           !testRun);
+           "Selection predicates on union node not supported");
       return FALSE;
     }
 
@@ -5038,6 +5036,9 @@ NABoolean Union::prepareTreeForCSESharing(
       ValueIdSet childPredsToAdd;
       ValueIdMap *map = (i==0 ? &getLeftMap() : &getRightMap());
       ValueIdSet availableValues(map->getTopValues());
+      ValueIdSet dummyValuesForVEGRewrite;
+      ValueIdSet mappedKeyColumns;
+      ValueIdSet childKeyColumns;
 
       // if there are outputs to add, we can only do that for
       // outputs that already exist in the ValueIdMap
@@ -5045,8 +5046,7 @@ NABoolean Union::prepareTreeForCSESharing(
       if (locOutputsToAdd.removeUnCoveredExprs(availableValues))
         {
           info->getConsumer(0)->emitCSEDiagnostics(
-               "Not able to add output values unknown to union operator",
-               !testRun);
+               "Not able to add output values unknown to union operator");
           result = FALSE;
         }
 
@@ -5059,14 +5059,36 @@ NABoolean Union::prepareTreeForCSESharing(
            childPredsToRemove,
            childPredsToAdd,
            inputsToRemove,
-           info,
-           testRun);
+           dummyValuesForVEGRewrite,
+           childKeyColumns,
+           info);
+
+      map->mapValueIdSetUp(mappedKeyColumns, childKeyColumns);
+      // include only those that actually got mapped
+      mappedKeyColumns -= childKeyColumns;
+      keyColumns += mappedKeyColumns;
     }
 
-  if (result && !testRun)
+  if (result)
     {
+      NABoolean dummy;
+      CollIndex nu = unionMap_->leftColMap_.getBottomValues().entries();
+
       getGroupAttr()->addCharacteristicOutputs(outputsToAdd);
       getGroupAttr()->removeCharacteristicInputs(inputsToRemove);
+
+      // add columns that are a constant in at least one of the
+      // UNION's children to the key columns. Such columns can be used
+      // to eliminate entire legs of the union and therefore act like
+      // key or partition key columns.
+      for (CollIndex u=0; u<nu; u++)
+        {
+          if (unionMap_->leftColMap_.getBottomValues()[u].getItemExpr()->
+                castToConstValue(dummy) ||
+              unionMap_->rightColMap_.getBottomValues()[u].getItemExpr()->
+                castToConstValue(dummy))
+            keyColumns += unionMap_->colMapTable_[u];
+        }
     }
 
   // there is no need to call prepareMeForCSESharing() here
@@ -5702,38 +5724,37 @@ NABoolean GroupByAgg::prepareMeForCSESharing(
      const ValueIdSet &predicatesToRemove,
      const ValueIdSet &commonPredicatesToAdd,
      const ValueIdSet &inputsToRemove,
-     CSEInfo *info,
-     NABoolean testRun)
+     ValueIdSet &valuesForVEGRewrite,
+     ValueIdSet &keyColumns,
+     CSEInfo *info)
 {
   // The caller of this method took care of most adjustments to
   // make. The main thing the groupby node needs to do is to add any
   // outputs that are required to its characteristic outputs.
 
-  if (!testRun)
-    {
-      ValueIdSet myAvailableValues(groupExpr_);
-      ValueIdSet referencedValues;
-      ValueIdSet myOutputsToAdd;
-      ValueIdSet unCoveredExpr;
+  ValueIdSet myAvailableValues(groupExpr_);
+  ValueIdSet referencedValues;
+  ValueIdSet myOutputsToAdd;
+  ValueIdSet unCoveredExpr;
 
-      myAvailableValues += aggregateExpr_;
+  myAvailableValues += aggregateExpr_;
+  valuesForVEGRewrite += aggregateExpr_;
 
-      // The caller may be asking for expressions on columns, maybe
-      // even an expression involving grouping columns and aggregates
-      // and multiple tables, therefore use the isCovered method to
-      // determine those subexpressions that we can produce here.
-      NABoolean allCovered =
-        outputsToAdd.isCovered(myAvailableValues,
-                               *(getGroupAttr()),
-                               referencedValues,
-                               myOutputsToAdd,
-                               unCoveredExpr);
+  // The caller may be asking for expressions on columns, maybe
+  // even an expression involving grouping columns and aggregates
+  // and multiple tables, therefore use the isCovered method to
+  // determine those subexpressions that we can produce here.
+  NABoolean allCovered =
+    outputsToAdd.isCovered(myAvailableValues,
+                           *(getGroupAttr()),
+                           referencedValues,
+                           myOutputsToAdd,
+                           unCoveredExpr);
 
-      if (allCovered)
-        myOutputsToAdd = outputsToAdd;
+  if (allCovered)
+    myOutputsToAdd = outputsToAdd;
 
-      getGroupAttr()->addCharacteristicOutputs(myOutputsToAdd);
-    }
+  getGroupAttr()->addCharacteristicOutputs(myOutputsToAdd);
 
   return TRUE;
 }
@@ -6101,32 +6122,33 @@ NABoolean Scan::prepareMeForCSESharing(
      const ValueIdSet &predicatesToRemove,
      const ValueIdSet &commonPredicatesToAdd,
      const ValueIdSet &inputsToRemove,
-     CSEInfo *info,
-     NABoolean testRun)
+     ValueIdSet &valuesForVEGRewrite,
+     ValueIdSet &keyColumns,
+     CSEInfo *info)
 {
   // The caller of this method took care of most adjustments to
   // make. The main thing the scan node needs to do is to add any
   // outputs that are required to its characteristic outputs.
 
-  if (!testRun)
-    {
-      ValueIdSet myColSet(getTableDesc()->getColumnVEGList());
-      ValueIdSet referencedCols;
-      ValueIdSet myOutputsToAdd;
-      ValueIdSet unCoveredExpr;
+  ValueIdSet myColSet(getTableDesc()->getColumnVEGList());
+  ValueIdSet referencedCols;
+  ValueIdSet myOutputsToAdd;
+  ValueIdSet unCoveredExpr;
 
-      // The caller may be asking for expressions on columns, maybe
-      // even an expression involving multiple tables, therefore use
-      // the isCovered method to determine those subexpressions that we
-      // can produce here.
-      outputsToAdd.isCovered(myColSet,
-                             *(getGroupAttr()),
-                             referencedCols,
-                             myOutputsToAdd,
-                             unCoveredExpr);
+  // The caller may be asking for expressions on columns, maybe
+  // even an expression involving multiple tables, therefore use
+  // the isCovered method to determine those subexpressions that we
+  // can produce here.
+  outputsToAdd.isCovered(myColSet,
+                         *(getGroupAttr()),
+                         referencedCols,
+                         myOutputsToAdd,
+                         unCoveredExpr);
 
-      getGroupAttr()->addCharacteristicOutputs(myOutputsToAdd);
-    }
+  getGroupAttr()->addCharacteristicOutputs(myOutputsToAdd);
+  valuesForVEGRewrite.insertList(getTableDesc()->getColumnList());
+
+  keyColumns.insertList(getTableDesc()->getClusteringIndex()->getIndexKey());
 
   return TRUE;
 }
@@ -7674,6 +7696,11 @@ RelExpr * RelRoot::semanticQueryOptimizeNode(NormWA & normWARef)
                                 );
   }
 
+  // for debugging
+  if (normWARef.getCommonSubExprRefCount() > 0 &&
+      CmpCommon::getDefault(CSE_PRINT_DEBUG_INFO) == DF_ON)
+    CommonSubExprRef::displayAll();
+
   return this;
 
 } // RelRoot::semanticQueryOptimizeNode()
@@ -7717,8 +7744,11 @@ RelExpr * RelRoot::inlineTempTablesForCSEs(NormWA & normWARef)
         if (cses->at(i)->getInsertIntoTemp() != NULL)
           toDoVec += i;
 
-      // loop over the to-do list, finding new entries for which we
-      // already processed all of their predecessors
+      // Loop over the to-do list, finding new entries for which we
+      // already processed all of their predecessors. In this context,
+      // the children are the predecessors, since we have to build the
+      // graph bottom-up. In other words, find a topological reverse
+      // order of the lexical graph of the CSEs.
       while (toDoVec.entries() > 0)
         {
           RelExpr *thisLevelOfInserts = NULL;
@@ -7726,15 +7756,18 @@ RelExpr * RelRoot::inlineTempTablesForCSEs(NormWA & normWARef)
           for (CollIndex c=0; toDoVec.nextUsed(c); c++)
             {
               CSEInfo *info = cses->at(c);
-              // predecessor CSEs that have to be computed before we
+              // predecessor (child) CSEs that have to be computed before we
               // can attempt to compute this one
-              const LIST(CSEInfo *) &predecessors(info->getChildCSEs());
+              const LIST(CountedCSEInfo) &predecessors(info->getChildCSEs());
               NABoolean isReady = TRUE;
 
               for (CollIndex p=0; p<predecessors.entries(); p++)
                 {
-                  if (!doneVec.contains(p) &&
-                      cses->at(p)->getInsertIntoTemp() != NULL)
+                  Int32 cseId = predecessors[p].getInfo()->getCSEId();
+
+                  CMPASSERT(cses->at(cseId)->getCSEId() == cseId);
+                  if (!doneVec.contains(cseId) &&
+                      cses->at(cseId)->getInsertIntoTemp() != NULL)
                     // a predecessor CSE for which we have to
                     // materialize a temp table has not yet
                     // been processed - can't do this one
@@ -8017,8 +8050,9 @@ NABoolean RelExpr::prepareTreeForCSESharing(
      const ValueIdSet &predicatesToRemove,
      const ValueIdSet &newPredicatesToAdd,
      const ValueIdSet &inputsToRemove,
-     CSEInfo *info,
-     NABoolean testRun)
+     ValueIdSet &valuesForVEGRewrite,
+     ValueIdSet &keyColumns,
+     CSEInfo *info)
 {
   NABoolean result = TRUE;
   CollIndex nc = getArity();
@@ -8045,19 +8079,17 @@ NABoolean RelExpr::prepareTreeForCSESharing(
            childPredsToRemove,
            childPredsToAdd,
            inputsToRemove,
-           info,
-           testRun);
+           valuesForVEGRewrite,
+           keyColumns,
+           info);
 
-      if (!testRun)
-        {
-          // if the child already had or has added any of the requested
-          // outputs, then add them to our own char. outputs
-          ValueIdSet childAddedOutputs(
-               child(i).getGroupAttr()->getCharacteristicOutputs());
+      // if the child already had or has added any of the requested
+      // outputs, then add them to our own char. outputs
+      ValueIdSet childAddedOutputs(
+           child(i).getGroupAttr()->getCharacteristicOutputs());
 
-          childAddedOutputs.intersectSet(outputsToAdd);
-          getGroupAttr()->addCharacteristicOutputs(childAddedOutputs);
-        }
+      childAddedOutputs.intersectSet(outputsToAdd);
+      getGroupAttr()->addCharacteristicOutputs(childAddedOutputs);
 
       // Todo: CSE: consider using recursivePushDownCoveredExpr
       // instead of pushing these new predicates in this method
@@ -8065,10 +8097,10 @@ NABoolean RelExpr::prepareTreeForCSESharing(
       newLocalPredicates -= childPredsToAdd;
     }
 
-  if (result && !testRun)
+  if (result)
     {
       // Remove the predicates from our selection predicates.
-      // Note that the virtual method above is supposed to remove
+      // Note that prepareMeForCSESharing() is supposed to remove
       // these predicates from all other places in the node.
       predicates_ -= predicatesToRemove;
 
@@ -8103,8 +8135,9 @@ NABoolean RelExpr::prepareTreeForCSESharing(
                                     predicatesToRemove,
                                     newLocalPredicates,
                                     inputsToRemove,
-                                    info,
-                                    testRun);
+                                    valuesForVEGRewrite,
+                                    keyColumns,
+                                    info);
 
   return result;
 }
@@ -8129,8 +8162,9 @@ NABoolean RelExpr::prepareMeForCSESharing(
      const ValueIdSet &predicatesToRemove,
      const ValueIdSet &newPredicatesToAdd,
      const ValueIdSet &inputsToRemove,
-     CSEInfo *info,
-     NABoolean testRun)
+     ValueIdSet &valuesForVEGRewrite,
+     ValueIdSet &keyColumns,
+     CSEInfo *info)
 {
   // A class derived from RelExpr must explicitly define
   // this method to support being part of a shared CSE
@@ -8140,7 +8174,7 @@ NABoolean RelExpr::prepareMeForCSESharing(
   snprintf(buf, sizeof(buf), "Operator %s not supported",
            getText().data());
 
-  info->getConsumer(0)->emitCSEDiagnostics(buf, !testRun);
+  info->getConsumer(0)->emitCSEDiagnostics(buf);
 
   return FALSE;
 }
@@ -8990,6 +9024,11 @@ void CommonSubExprRef::transformNode(NormWA & normWARef,
     return;
   markAsTransformed();
 
+  // set lexicalRefNumFromParent_ for expanded refs, now that
+  // we can be sure the lexical ref has been bound
+  if (isAnExpansionOf_)
+    lexicalRefNumFromParent_ = isAnExpansionOf_->lexicalRefNumFromParent_;
+
   // Allocate a new VEG region for the child, to prevent VEGies that
   // cross the potentially common part and the rest of the query tree.
   //normWARef.allocateAndSetVEGRegion(EXPORT_ONLY, this);
@@ -9012,10 +9051,6 @@ void CommonSubExprRef::pullUpPreds()
   // pulled-up predicates here
   // RelExpr::pullUpPreds();
   // pulledPredicates_ += selectionPred();
-
-  // this is also the time to record the original set of inputs
-  // for this node, before predicate pushdown can alter the inputs
-  commonInputs_ = getGroupAttr()->getCharacteristicInputs();
 }
 
 void CommonSubExprRef::pushdownCoveredExpr(
@@ -9033,6 +9068,11 @@ void CommonSubExprRef::pushdownCoveredExpr(
   // share a common query tree.
   ValueIdSet predsPushedThisTime(predicatesOnParent);
 
+  if (pushedPredicates_.isEmpty())
+    // this is also the time to record the original set of inputs
+    // for this node, before predicate pushdown can alter the inputs
+    commonInputs_ = getGroupAttr()->getCharacteristicInputs();
+
   RelExpr::pushdownCoveredExpr(outputExpr,
                                newExternalInputs,
                                predicatesOnParent,
@@ -9059,13 +9099,13 @@ RelExpr * CommonSubExprRef::semanticQueryOptimizeNode(NormWA & normWARef)
   RelExpr *result = this;
   NABoolean ok = TRUE;
   CSEInfo *info = CmpCommon::statement()->getCSEInfo(internalName_);
-  CSEInfo::CSEAnalysisOutcome action = CSEInfo::UNKNOWN_ANALYSIS;
 
-  RelExpr::semanticQueryOptimizeNode(normWARef);
+  // do the analysis top-down
+  analyzeAndPrepareForSharing(*info);
 
-  action = analyzeAndPrepareForSharing(*info);
+  RelExpr::semanticQueryOptimizeNode(normWARef);
 
-  switch (action)
+  switch (info->getAnalysisOutcome(id_))
     {
     case CSEInfo::EXPAND:
       // Not able to share the CSE, expand the CSE by eliminating
@@ -9108,11 +9148,6 @@ RelExpr * CommonSubExprRef::semanticQueryOptimizeNode(NormWA & normWARef)
       CMPASSERT(0);
     }
 
-  // for debugging
-  if (CmpCommon::getDefault(CSE_PRINT_DEBUG_INFO) == DF_ON &&
-      info->getIdOfAnalyzingConsumer() == id_)
-    displayAll(internalName_);
-
   if (result == NULL)
     emitCSEDiagnostics("Error in creating temp table or temp table insert",
                        TRUE);
@@ -9120,16 +9155,55 @@ RelExpr * CommonSubExprRef::semanticQueryOptimizeNode(NormWA & normWARef)
   return result;
 }
 
+NABoolean CommonSubExprRef::prepareMeForCSESharing(
+     const ValueIdSet &outputsToAdd,
+     const ValueIdSet &predicatesToRemove,
+     const ValueIdSet &commonPredicatesToAdd,
+     const ValueIdSet &inputsToRemove,
+     ValueIdSet &valuesForVEGRewrite,
+     ValueIdSet &keyColumns,
+     CSEInfo *info)
+{
+  // the caller of this method already took care of the adjustments to
+  // make, just make sure that all predicates could be pushed down to
+  // the child
+
+  if (!getSelectionPred().isEmpty())
+    {
+      // this should not happen
+      emitCSEDiagnostics("Unable to push common predicates into child tree");
+      return FALSE;
+    }
+  return TRUE;
+}
+
 CSEInfo::CSEAnalysisOutcome CommonSubExprRef::analyzeAndPrepareForSharing(CSEInfo &info)
 {
   // do a few simple shortcuts first
 
+  // Make sure this consumer is in the main list of consumers. Note
+  // that the analysis is done top-down and that currently the only
+  // two places where we make copies of the tree are in
+  // RelRoot::semanticQueryOptimizeNode() and in this method. The copy
+  // made in the root is only used when we bypass SQO completely.
+  // Although we may sometimes look at unused copies during the CSE
+  // analysis phase, this guarantees (for now) that the analyzing
+  // consumer always is and stays in the list of consumers. If we ever
+  // make additional copies of the tree we may need to reconsider this
+  // logic.
+  if (info.getConsumer(id_) != this)
+    {
+      info.replaceConsumerWithAnAlternative(this);
+      DCMPASSERT(info.getConsumer(id_) == this);
+    }
+
   // If another consumer has already done the analysis, return its result.
   // Note: Right now, all the consumers do the same, in the future, we could
   // expand some and share others.
   if (info.getIdOfAnalyzingConsumer() >= 0)
     return info.getAnalysisOutcome(id_);
 
+  // mark me as the analyzing consumer
   info.setIdOfAnalyzingConsumer(id_);
 
   if (CmpCommon::getDefault(CSE_USE_TEMP) == DF_OFF)
@@ -9145,6 +9219,7 @@ CSEInfo::CSEAnalysisOutcome CommonSubExprRef::analyzeAndPrepareForSharing(CSEInf
   ValueIdList tempTableColumns;
   const ValueIdSet &charOutputs(getGroupAttr()->getCharacteristicOutputs());
   CollIndex numConsumers = info.getNumConsumers();
+  RelExpr *copyOfChildTree = NULL;
 
   // A laundry list of changes to undo the effects of normalization,
   // specifically of pushing predicates down and of minimizing the
@@ -9160,6 +9235,10 @@ CSEInfo::CSEAnalysisOutcome CommonSubExprRef::analyzeAndPrepareForSharing(CSEInf
     new(CmpCommon::statementHeap()) ValueIdMap[numConsumers];
   ItemExpr *nonCommonPredicatesORed = NULL;
   int numORedPreds = 0;
+  NABoolean singleLexicalRefWithTempedAncestors =
+    (info.getNumLexicalRefs() == 1);
+  Int32 numPreliminaryRefs = 0;
+  ValueIdSet childTreeKeyColumns;
 
   // ------------------------------------------------------------------
   // CSE Analysis phase
@@ -9179,6 +9258,35 @@ CSEInfo::CSEAnalysisOutcome CommonSubExprRef::analyzeAndPrepareForSharing(CSEInf
       const ValueIdSet &cPreds(consumer->pushedPredicates_);
       ValueIdSet mappedPreds;
       ValueId dummy;
+      CSEInfo *infoToCheck = &info;
+      CommonSubExprRef *childToCheck = consumer;
+      NABoolean ancestorIsTemped = FALSE;
+
+      // look for a chain of only lexical ancestors of which one is
+      // materialized in a temp table
+      while (!ancestorIsTemped &&
+             infoToCheck->getNumLexicalRefs() == 1 &&
+             childToCheck &&
+             childToCheck->parentRefId_ >= 0)
+        {
+          // look at the ancestor and what it is planning to do
+          infoToCheck = CmpCommon::statement()->getCSEInfoById(
+               childToCheck->parentCSEId_);
+          CMPASSERT(infoToCheck);
+          CommonSubExprRef *parent =
+            infoToCheck->getConsumer(childToCheck->parentRefId_);
+          CSEInfo::CSEAnalysisOutcome parentOutcome =
+            infoToCheck->getAnalysisOutcome(parent->getId());
+
+          if (parentOutcome == CSEInfo::CREATE_TEMP ||
+              parentOutcome == CSEInfo::TEMP)
+            ancestorIsTemped = TRUE;
+
+          childToCheck = parent;
+        }
+
+      if (!ancestorIsTemped)
+        singleLexicalRefWithTempedAncestors = FALSE;
 
       requiredValues += cPreds;
       availableValues +=
@@ -9273,7 +9381,7 @@ CSEInfo::CSEAnalysisOutcome CommonSubExprRef::analyzeAndPrepareForSharing(CSEInf
             // own ValueIds
             myColsToConsumerMaps[c].rewriteValueIdSetUp(mappedPreds, cPreds);
 
-            commonPredicates.findCommonSubexpressions(mappedPreds, TRUE);
+            commonPredicates.findCommonSubexpressions(mappedPreds, FALSE);
 
           }
       // Save the mapped preds for later.
@@ -9282,6 +9390,17 @@ CSEInfo::CSEAnalysisOutcome CommonSubExprRef::analyzeAndPrepareForSharing(CSEInf
       nonCommonPredicatesArray[c] = mappedPreds;
     }
 
+  if (singleLexicalRefWithTempedAncestors)
+    {
+      // if all the parent refs are materialized and each one is a
+      // copy of a single lexical ref, then that means that we will
+      // evaluate this CSE only once, therefore no need to materialize
+      // it
+      emitCSEDiagnostics(
+           "expression is only evaluated once because parent is materialized");
+      canShare = FALSE;
+    }
+
   // translate the bit vector of required columns into a set of values
   // that are required (by other consumers) but are not produced by my
   // child tree
@@ -9293,6 +9412,15 @@ CSEInfo::CSEAnalysisOutcome CommonSubExprRef::analyzeAndPrepareForSharing(CSEInf
   predicatesToRemove -= commonPredicates;
   info.setCommonPredicates(commonPredicates);
 
+  if (canShare && info.getNeededColumns().entries() == 0)
+    {
+      // Temp table has no columns, looks like all we care about is
+      // the number of rows returned. This is not yet supported. We
+      // could make a table with a dummy column.
+      emitCSEDiagnostics("Temp table with no columns is not yet supported");
+      canShare = FALSE;
+    }
+
   // Make an ORed predicate of all those non-common predicates of the
   // consumers, to be applied on the common subexpression when creating
   // the temp table. Also determine non-common predicates to be applied
@@ -9305,7 +9433,11 @@ CSEInfo::CSEAnalysisOutcome CommonSubExprRef::analyzeAndPrepareForSharing(CSEInf
       // tables. What we can do, however, is to OR these "uncommon"
       // predicates and apply that OR predicate when building the
       // temp table.
-      nonCommonPredicatesArray[n] -= commonPredicates;
+
+      // repeat step from above, but this time remove the common
+      // preds from the array of non-common ones
+      commonPredicates.findCommonSubexpressions(nonCommonPredicatesArray[n],
+                                                TRUE);
 
       if (nonCommonPredicatesArray[n].entries() > 0)
         {
@@ -9348,38 +9480,6 @@ CSEInfo::CSEAnalysisOutcome CommonSubExprRef::analyzeAndPrepareForSharing(CSEInf
       info.addCommonPredicates(newPredicatesToAdd);
     }
 
-  outputsToAdd -= child(0).getGroupAttr()->getCharacteristicOutputs();
-  inputsToRemove -= commonInputs_;
-
-  // do a dry-run first, before we change the child tree,
-  // because if we can't prepare the tree for CSE sharing,
-  // we'll need it in its original form to be able to expand it
-  if (canShare)
-    canShare = child(0)->prepareTreeForCSESharing(
-         outputsToAdd,
-         predicatesToRemove,
-         newPredicatesToAdd,
-         inputsToRemove,
-         &info,
-         TRUE);
-
-  if (canShare &&
-      CmpCommon::getDefault(CSE_USE_TEMP) != DF_ON)
-    {
-      // Todo: CSE: make a heuristic decision
-      emitCSEDiagnostics("Heuristically decided not to materialize");
-      canShare = FALSE;
-    }
-
-  if (canShare && info.getNeededColumns().entries() == 0)
-    {
-      // Temp table has no columns, looks like all we care about is
-      // the number of rows returned. This is not yet supported. We
-      // could make a table with a dummy column.
-      emitCSEDiagnostics("Temp table with no columns is not supported");
-      canShare = FALSE;
-    }
-
 
   // ------------------------------------------------------------------
   // Preparation phase
@@ -9387,38 +9487,152 @@ CSEInfo::CSEAnalysisOutcome CommonSubExprRef::analyzeAndPrepareForSharing(CSEInf
 
   if (canShare)
     {
-      canShare = child(0)->prepareTreeForCSESharing(
+      // make a copy of the child tree, so we can revert back to the
+      // original tree if things don't work out
+      copyOfChildTree = child(0)->copyRelExprTree(CmpCommon::statementHeap());
+
+      outputsToAdd -= child(0).getGroupAttr()->getCharacteristicOutputs();
+      inputsToRemove -= commonInputs_;
+
+      canShare = copyOfChildTree->prepareTreeForCSESharing(
            outputsToAdd,
            predicatesToRemove,
            newPredicatesToAdd,
            inputsToRemove,
-           &info,
-           FALSE);
+           nonVEGColumns_,
+           childTreeKeyColumns,
+           &info);
 
-      // If this failed we are in trouble, we potentially changed the
-      // child tree, preventing us from expanding it. Therefore we
-      // have to error out. We should therefore find most problems
-      // in the dry run above.
       if (!canShare)
-        {
-          emitCSEDiagnostics("Failed to prepare child tree for materialization",
-                             TRUE);
-          result = CSEInfo::ERROR;
-        }
-      else if (!child(0).getGroupAttr()->getCharacteristicOutputs().contains(
+        emitCSEDiagnostics("Failed to prepare child tree for materialization");
+      else if (!copyOfChildTree->getGroupAttr()->getCharacteristicOutputs().contains(
                     outputsToAdd))
         {
           // we failed to produce the requested additional outputs
-          emitCSEDiagnostics("Failed to produce all the required output columns",
-                             TRUE);
+          emitCSEDiagnostics("Failed to produce all the required output columns");
           canShare = FALSE;
-          result = CSEInfo::ERROR;
         }
+      else
+        {
+          // remember est. log. props of the child, those will be transplanted
+          // into the temp scan later
+          cseEstLogProps_ =
+            copyOfChildTree->getGroupAttr()->outputLogProp(
+                 (*GLOBAL_EMPTY_INPUT_LOGPROP));
+          // Get a preliminary bearing on how many times we are going
+          // to evaluate this CSE if it isn't shared. Note that this
+          // looks at the parent CSE's analysis outcome, and not all
+          // of these parents may be analyzed yet, so this may be an
+          // overestimate.
+          numPreliminaryRefs = info.getTotalNumRefs();
+
+          for (CollIndex k=0; k<tempTableColumns.entries(); k++)
+            if (childTreeKeyColumns.contains(tempTableColumns[k]))
+              info.addCSEKeyColumn(k);
+        }
+    }
 
+  if (canShare &&
+      CmpCommon::getDefault(CSE_USE_TEMP) != DF_ON)
+    {
+      // When CSE_USE_TEMP is set to SYSTEM, make a heuristic decision
+
+      // calculate some metrics for the temp table, based on row length,
+      // cardinality (or max. cardinality) and number of times it is used
+      Lng32 tempTableRowLength = tempTableColumns.getRowLength();
+      CostScalar cseTempTableSize = cseEstLogProps_->getResultCardinality() *
+        tempTableRowLength / numPreliminaryRefs;
+      CostScalar cseTempTableMaxSize = cseEstLogProps_->getMaxCardEst() *
+        tempTableRowLength / numPreliminaryRefs;
+      double maxTableSize =
+        ActiveSchemaDB()->getDefaults().getAsDouble(CSE_TEMP_TABLE_MAX_SIZE);
+      double maxTableSizeBasedOnMaxCard =
+        ActiveSchemaDB()->getDefaults().getAsDouble(CSE_TEMP_TABLE_MAX_MAX_SIZE);
+
+      // cumulative number of key columns referenced in consumers
+      Int32 totalKeyColPreds = 0;
+
+      // key cols that are referenced by a predicate in all consumers
+      ValueIdSet commonKeyCols(childTreeKeyColumns);
+
+      // check the total size of the temp table, divided by the number
+      // of times it is used
+      if (maxTableSize > 0 && cseTempTableSize > maxTableSize)
+        {
+          char buf[200];
+
+          snprintf(buf, sizeof(buf),
+                   "Temp table size %e exceeds limit %e",
+                   cseTempTableSize.getValue(),
+                   maxTableSize);
+          emitCSEDiagnostics(buf);
+          canShare = FALSE;
+        }
+      else if (maxTableSizeBasedOnMaxCard > 0 &&
+               cseTempTableMaxSize > maxTableSizeBasedOnMaxCard)
+        {
+          char buf[200];
+
+          snprintf(buf, sizeof(buf),
+                   "Temp table size %e (based on max card) exceeds limit %e",
+                   cseTempTableMaxSize.getValue(),
+                   maxTableSizeBasedOnMaxCard);
+          emitCSEDiagnostics(buf);
+          canShare = FALSE;
+        }
+
+      // determine which "key" columns are referenced by non-common
+      // predicates
+      for (CollIndex ncp=0; ncp<numConsumers; ncp++)
+        {
+          const ValueIdSet &nonCommonPreds(nonCommonPredicatesArray[ncp]);
+          ValueIdSet tempRefCols;
+
+          tempRefCols.accumulateReferencedValues(childTreeKeyColumns,
+                                                 nonCommonPreds);
+          totalKeyColPreds += tempRefCols.entries();
+          nonCommonPreds.weedOutUnreferenced(commonKeyCols);
+        }
+
+      // decide against materialization if the average number of "key"
+      // columns referenced in each consumer is greater than
+      // CSE_PCT_KEY_COL_PRED_CONTROL percent
+      if (totalKeyColPreds >
+          (numConsumers * childTreeKeyColumns.entries() *
+           ActiveSchemaDB()->getDefaults().getAsDouble(CSE_PCT_KEY_COL_PRED_CONTROL) / 100.0))
+        {
+          char buf[200];
+
+          snprintf(buf, sizeof(buf),
+                   "Number of potential key predicates in consumers (%d) exceeds limit %f",
+                   totalKeyColPreds,
+                   (numConsumers * childTreeKeyColumns.entries() *
+                    ActiveSchemaDB()->getDefaults().getAsDouble(CSE_PCT_KEY_COL_PRED_CONTROL) / 100.0));
+          
+          emitCSEDiagnostics(buf);
+          canShare = FALSE;
+        }
+
+      // decide against materialization if the number of key columns
+      // referenced by every consumer is > CSE_COMMON_KEY_PRED_CONTROL
+      if (commonKeyCols.entries() >
+          ActiveSchemaDB()->getDefaults().getAsLong(CSE_COMMON_KEY_PRED_CONTROL))
+        {
+          char buf[200];
+          snprintf(buf, sizeof(buf),
+                   "All consumers have a predicate on %d common key columns, limit is %d",
+                   commonKeyCols.entries(),
+                   ActiveSchemaDB()->getDefaults().getAsLong(CSE_COMMON_KEY_PRED_CONTROL));
+          emitCSEDiagnostics(buf);
+          canShare = FALSE;
+        }
     }
 
   if (canShare)
-    result = CSEInfo::CREATE_TEMP;
+    {
+      result = CSEInfo::CREATE_TEMP;
+      child(0) = copyOfChildTree;
+    }
   else if (result == CSEInfo::UNKNOWN_ANALYSIS)
     result = CSEInfo::EXPAND;
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/OptLogRelExpr.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/OptLogRelExpr.cpp b/core/sql/optimizer/OptLogRelExpr.cpp
index 227cc08..dd89c4e 100644
--- a/core/sql/optimizer/OptLogRelExpr.cpp
+++ b/core/sql/optimizer/OptLogRelExpr.cpp
@@ -4772,10 +4772,26 @@ MapValueIds::synthEstLogProp(const EstLogPropSharedPtr& inputEstLogProp)
 
   // Synthesize estimated logical properties from my child's
 
+  const ColStatDescList *colStats =
+    &(child(0).outputLogProp(inputEstLogProp)->getColStats());
+  CostScalar baseCardinality =
+    child(0).outputLogProp(inputEstLogProp)->getResultCardinality();
+
+  if (cseRef_) // could also do this unconditionally
+    {
+      ColStatDescList *mappedStats = new(CmpCommon::statementHeap())
+        ColStatDescList(CmpCommon::statementHeap());
+ 
+      mappedStats->makeMappedDeepCopy(*colStats,
+                                      map_,
+                                      TRUE);
+      colStats = mappedStats;
+    }
+
   EstLogPropSharedPtr myEstLogProp =
     synthEstLogPropForUnaryLeafOp (inputEstLogProp,
-	    child(0).outputLogProp(inputEstLogProp)->getColStats(),
-	    child(0).outputLogProp(inputEstLogProp)->getResultCardinality());
+                                   *colStats,
+                                   baseCardinality);
 
   getGroupAttr()->addInputOutputLogProp (inputEstLogProp, myEstLogProp);
 
@@ -4836,14 +4852,61 @@ const Lng32 Scan::MAX_NUM_INDEX_JOINS = 1;
 void
 Scan::synthEstLogProp(const EstLogPropSharedPtr& inputEstLogProp)
 {
-  if (getGroupAttr()->isPropSynthesized(inputEstLogProp)) return;
+  if (getGroupAttr()->isPropSynthesized(inputEstLogProp))
+    return;
 
   // synthesize my estimated logical properties from the initial
-  // table statistics
+  // table statistics or the stats of a common subexpression
+  const ColStatDescList *colStats =
+    &getTableDesc()->getTableColStats();
+  CostScalar baseCardinality = getBaseCardinality();
+
+  if (commonSubExpr_)
+    {
+      ValueIdList tempTableCols;
+      ValueIdList tempTableVEGCols;
+      ValueIdList cseCols;
+      CSEInfo *info =
+        CmpCommon::statement()->getCSEInfo(commonSubExpr_->getName());
+      // The original tree is the child of the CommonSubExprRef that
+      // did the analysis of whether to use CSEs
+      CommonSubExprRef *analyzingCSERef =
+        info->getConsumer(info->getIdOfAnalyzingConsumer());
+
+      // this makes the ValueIdList of only those CSE columns that are
+      // actually used in the temp table
+      commonSubExpr_->makeValueIdListFromBitVector(
+           cseCols,
+           analyzingCSERef->getColumnList(),
+           info->getNeededColumns());
+
+      getTableDesc()->getUserColumnList(tempTableCols);
+      getTableDesc()->getEquivVEGCols(tempTableCols, tempTableVEGCols);
+      // make a ValueIdMap from the original tree for the common
+      // subexpression (bottom) to the columns of our temp table (top)
+      ValueIdMap cseToTempScanMap(tempTableVEGCols,
+                                  cseCols);
+
+      // get to the ColStatDesc of the common subexpression
+      const ColStatDescList &origCSEColStats(
+           analyzingCSERef->getEstLogProps()->colStats());
+
+      ColStatDescList *mappedStats = new(CmpCommon::statementHeap())
+        ColStatDescList(CmpCommon::statementHeap());
+
+      // now translate the col stats of the original CSE to the temp table
+      mappedStats->makeMappedDeepCopy(origCSEColStats,
+                                      cseToTempScanMap,
+                                      FALSE);
+
+      colStats = mappedStats;
+      baseCardinality = analyzingCSERef->getEstLogProps()->getResultCardinality();
+    }
+
   EstLogPropSharedPtr myEstLogProp =
-    synthEstLogPropForUnaryLeafOp (inputEstLogProp,
-			       getTableDesc()->getTableColStats(),
-			       getBaseCardinality());
+    synthEstLogPropForUnaryLeafOp(inputEstLogProp,
+                                  *colStats,
+                                  baseCardinality);
 
   QueryAnalysis *qa = QueryAnalysis::Instance();
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/RelExeUtil.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/RelExeUtil.cpp b/core/sql/optimizer/RelExeUtil.cpp
index c568081..f7230a1 100644
--- a/core/sql/optimizer/RelExeUtil.cpp
+++ b/core/sql/optimizer/RelExeUtil.cpp
@@ -116,6 +116,21 @@ void castComputedColumnsToAnsiTypes(BindWA *bindWA,
 // Member functions for class GenericUtilExpr
 // -----------------------------------------------------------------------
 
+NABoolean GenericUtilExpr::duplicateMatch(const RelExpr & other) const
+{
+  if (NOT RelExpr::duplicateMatch(other))
+    return FALSE;
+
+  // a simplified version, should really check all fields
+  GenericUtilExpr &o = (GenericUtilExpr &) other;
+  if (NOT (stmtText_ == o.stmtText_ ||
+           stmtText_ && o.stmtText_ &&
+           (strcmp(stmtText_, o.stmtText_) == 0)))
+    return FALSE;
+
+  return TRUE;
+}
+
 RelExpr * GenericUtilExpr::copyTopNode(RelExpr *derivedNode, CollHeap* outHeap)
 {
   GenericUtilExpr *result;
@@ -297,6 +312,33 @@ NABoolean ExeUtilExpr::pilotAnalysis(QueryAnalysis* qa)
   return TRUE;
 }
 
+HashValue ExeUtilExpr::topHash()
+{
+  HashValue result = GenericUtilExpr::topHash();
+
+  result ^= tableId_;
+
+  return result;
+}
+
+NABoolean ExeUtilExpr::duplicateMatch(const RelExpr & other) const
+{
+  if (NOT GenericUtilExpr::duplicateMatch(other))
+    return FALSE;
+
+  // a simplified version, should really check all fields
+  ExeUtilExpr &o = (ExeUtilExpr &) other;
+  if (NOT (tableId_ == o.tableId_))
+    return FALSE;
+
+  // if tableDesc is not allocated, compare the names
+  if (tableId_ == NULL &&
+      NOT(tableName_ == o.tableName_))
+    return FALSE;
+
+  return TRUE;
+}
+
 RelExpr * ExeUtilExpr::copyTopNode(RelExpr *derivedNode, CollHeap* outHeap)
 {
   ExeUtilExpr *result;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/RelExeUtil.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/RelExeUtil.h b/core/sql/optimizer/RelExeUtil.h
index f786eeb..74d63c0 100644
--- a/core/sql/optimizer/RelExeUtil.h
+++ b/core/sql/optimizer/RelExeUtil.h
@@ -137,6 +137,7 @@ public:
 
   // get the degree of this node 
   virtual Int32 getArity() const { return 0; };
+  virtual NABoolean duplicateMatch(const RelExpr & other) const;
   virtual RelExpr * copyTopNode(RelExpr *derivedNode = NULL,
 				CollHeap* outHeap = 0);
 
@@ -597,6 +598,8 @@ public:
   ExeUtilExpr()
        : GenericUtilExpr(REL_EXE_UTIL) {};
 
+  virtual HashValue topHash();
+  virtual NABoolean duplicateMatch(const RelExpr & other) const;
   virtual RelExpr * copyTopNode(RelExpr *derivedNode = NULL,
 				CollHeap* outHeap = 0);
   virtual const NAString getText() const;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/2d415568/core/sql/optimizer/RelExpr.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/RelExpr.cpp b/core/sql/optimizer/RelExpr.cpp
index 74a20bf..ee8f776 100644
--- a/core/sql/optimizer/RelExpr.cpp
+++ b/core/sql/optimizer/RelExpr.cpp
@@ -287,6 +287,7 @@ RelExpr::RelExpr(OperatorTypeEnum otype,
   ,cachedTupleFormat_(ExpTupleDesc::UNINITIALIZED_FORMAT)
   ,cachedResizeCIFRecord_(FALSE)
   ,dopReduced_(FALSE)
+  ,originalExpr_(NULL)
 {
 
   child_[0] = leftChild;
@@ -854,6 +855,7 @@ RelExpr * RelExpr::copyTopNode(RelExpr *derivedNode,CollHeap* outHeap)
   result->sourceMemoExprId_ = sourceMemoExprId_;
   result->sourceGroupId_ = sourceGroupId_;
   result->costLimit_ = costLimit_;
+  result->originalExpr_ = this;
 
   return result;
 }
@@ -890,6 +892,32 @@ RelExpr * RelExpr::copyRelExprTree(CollHeap* outHeap)
   return result;
 }
 
+const RelExpr * RelExpr::getOriginalExpr(NABoolean transitive) const
+{
+  if (originalExpr_ == NULL)
+    return this;
+
+  RelExpr *result = originalExpr_;
+
+  while (result->originalExpr_ && transitive)
+    result = result->originalExpr_;
+
+  return result;
+}
+
+RelExpr * RelExpr::getOriginalExpr(NABoolean transitive)
+{
+  if (originalExpr_ == NULL)
+    return this;
+
+  RelExpr *result = originalExpr_;
+
+  while (result->originalExpr_ && transitive)
+    result = result->originalExpr_;
+
+  return result;
+}
+
 void RelExpr::setBlockStmtRecursively(NABoolean x)
 {
   setBlockStmt(x);
@@ -11988,16 +12016,73 @@ void MapValueIds::pushdownCoveredExpr(
           {
             VEG *veg =
               static_cast<VEGPredicate *>(g.getItemExpr())->getVEG();
-            ValueIdSet vegMembers(veg->getAllValues());
-
-            vegMembers += veg->getVEGReference()->getValueId();
-            vegMembers.intersectSet(
-                 getGroupAttr()->getCharacteristicOutputs());
-            if (!vegMembers.isEmpty())
-              // a VEGPred on one of my characteristic outputs,
-              // assume that my child tree already has the
-              // associated VEGPreds
-              predicatesOnParent -= g;
+            ValueId vegRef(veg->getVEGReference()->getValueId());
+
+            if (newExternalInputs.contains(vegRef))
+              {
+                // We are trying to push down a VEGPred and we are at
+                // the same time offering the VEGRef as a new
+                // characteristic input.  Example: We want to push
+                // VEGPred_2(a=b) down and we offer its corresponding
+                // VEGRef_1(a,b) as an input. The map in our
+                // MapValueIds node maps VEGRef_1(a,b) on the top to
+                // VEGRef_99(x,y) on the bottom. Note that either one
+                // of the VEGies might contain a constant that the
+                // other one doesn't contain. Note also that the
+                // MapValueIds node doesn't map characteristic inputs,
+                // those are passed unchanged to the child. So, we need
+                // to generate a predicate for the child that:
+                // a) represents the semantics of VEGPred_2(a,b)
+                // b) compares the new characteristic input
+                //    VEGRef_1(a,b) with some value in the child
+                // c) doesn't change the members of the existing
+                //    VEGies.
+                // The best solution is probably an equals predicate
+                // between the characteristic input and the VEGRef
+                // (or other expression) that is the child's equivalent.
+                // Example:  VEGRef_1(a,b) = VEGRef_99(x,y)
+                ValueId bottomVal;
+                ItemExpr *eqPred = NULL;
+
+                map_.mapValueIdDown(vegRef, bottomVal);
+
+                if (vegRef != bottomVal && bottomVal != NULL_VALUE_ID)
+                  {
+                    eqPred = new(CmpCommon::statementHeap()) BiRelat(
+                         ITM_EQUAL,
+                         vegRef.getItemExpr(),
+                         bottomVal.getItemExpr(),
+                         veg->getSpecialNulls());
+                    eqPred->synthTypeAndValueId();
+                    // replace g with the new equals predicate
+                    // when we do the actual rewrite below
+                    map_.addMapEntry(g, eqPred->getValueId());
+                  }
+              }
+            else
+              {
+                // Don't push down VEGPredicates on columns that are
+                // characteristic outputs of the MapValueIds. Those
+                // predicates (or their equivalents) should have
+                // already been pushed down.
+                ValueIdSet vegMembers(veg->getAllValues());
+
+                vegMembers += vegRef;
+                vegMembers.intersectSet(
+                     getGroupAttr()->getCharacteristicOutputs());
+                if (!vegMembers.isEmpty())
+                  {
+                    // a VEGPred on one of my characteristic outputs,
+
+                    // assume that my child tree already has the
+                    // associated VEGPreds and remove this predicate
+                    // silently
+                    predicatesOnParent -= g;
+                  }
+                // else leave the predicate and let the code below deal
+                // with it, this will probably end up in a failed assert
+                // below for predsRewrittenForChild.isEmpty()
+              }
           }
     }
 
@@ -12237,6 +12322,90 @@ void ControlRunningQuery::setComment(NAString &comment)
 // member functions for class CSEInfo (helper for CommonSubExprRef)
 // -----------------------------------------------------------------------
 
+Int32 CSEInfo::getTotalNumRefs(Int32 restrictToSingleConsumer) const
+{
+  // shortcut for main query
+  if (cseId_ == CmpStatement::getCSEIdForMainQuery())
+    return 1;
+
+  // Calculate how many times we will evaluate this common subexpression
+  // at runtime:
+  //  - If the CSE is shared then we evaluate it once
+  //  - Otherwise, look at the consumers that are lexical refs
+  //    (avoid double-counting refs from multiple copies of a parent)
+  //    and add the times they are being executed.
+  Int32 result = 0;
+  NABoolean sharedConsumers = FALSE;
+  LIST(CountedCSEInfo) countsByCSE(CmpCommon::statementHeap());
+  CollIndex minc = 0;
+  CollIndex maxc = consumers_.entries();
+
+  if (restrictToSingleConsumer >= 0)
+    {
+      // count only the executions resulting from a single consumer
+      minc = restrictToSingleConsumer;
+      maxc = minc + 1;
+    }
+
+  // loop over all consumers or look at just one
+  for (CollIndex c=minc; c<maxc; c++)
+    {
+      if (isShared(c))
+        {
+          sharedConsumers = TRUE;
+        }
+      else
+        {
+          CommonSubExprRef *consumer = getConsumer(c);
+          CSEInfo *parentInfo = CmpCommon::statement()->getCSEInfoById(
+               consumer->getParentCSEId());
+          NABoolean duplicateDueToSharedConsumer = FALSE;
+
+          // Don't double-count consumers that originate from the
+          // same parent CSE, have the same lexical ref number from
+          // the parent, and are shared.
+          if (parentInfo->isShared(
+                   consumer->getParentConsumerId()))
+            {
+              for (CollIndex d=0; d<countsByCSE.entries(); d++)
+                if (countsByCSE[d].getInfo() == parentInfo &&
+                    countsByCSE[d].getLexicalCount() ==
+                    consumer->getLexicalRefNumFromParent())
+                  {
+                    duplicateDueToSharedConsumer = TRUE;
+                    break;
+                  }
+
+              // First consumer from this parent CSE with this lexical
+              // ref number, remember that we are going to count
+              // it. Note that we are use the lexical ref "count" in
+              // CountedCSEInfo as the lexical ref number in this
+              // method, so the name "count" means "ref number" in
+              // this context.
+              if (!duplicateDueToSharedConsumer)
+                countsByCSE.insert(
+                     CountedCSEInfo(
+                          parentInfo,
+                          consumer->getLexicalRefNumFromParent()));
+            } // parent consumer is shared
+
+          if (!duplicateDueToSharedConsumer)
+            {
+              // recursively determine number of times the parent of
+              // this consumer gets executed
+              result += parentInfo->getTotalNumRefs(
+                   consumer->getParentConsumerId());
+            }
+        } // consumer is not shared
+    } // loop over consumer(s)
+
+  if (sharedConsumers)
+    // all the shared consumers are handled by evaluating the CSE once
+    result++;
+
+  return result;
+}
+
 CSEInfo::CSEAnalysisOutcome CSEInfo::getAnalysisOutcome(Int32 id) const
 {
   if (idOfAnalyzingConsumer_ != id &&
@@ -12248,10 +12417,37 @@ CSEInfo::CSEAnalysisOutcome CSEInfo::getAnalysisOutcome(Int32 id) const
     return analysisOutcome_;
 }
 
-void CSEInfo::addChildCSE(CSEInfo *child)
+Int32 CSEInfo::addChildCSE(CSEInfo *childInfo, NABoolean addLexicalRef)
 {
-  if (!childCSEs_.contains(child))
-    childCSEs_.insert(child);
+  Int32 result = -1;
+  CollIndex foundIndex = NULL_COLL_INDEX;
+
+  // look for an existing entry
+  for (CollIndex i=0;
+       i<childCSEs_.entries() && foundIndex == NULL_COLL_INDEX;
+       i++)
+    if (childCSEs_[i].getInfo() == childInfo)
+      foundIndex = i;
+
+  if (foundIndex == NULL_COLL_INDEX)
+    {
+      // create a new entry
+      foundIndex = childCSEs_.entries();
+      childCSEs_.insert(CountedCSEInfo(childInfo));
+    }           
+
+  if (addLexicalRef)
+    {
+      // The return value for a lexical ref is the count of lexical
+      // refs for this particular parent/child CSE relationship so far
+      // (0 if this is the first one). Note that we can't say anything
+      // abount counts for expanded refs at this time, those will be
+      // handled later, during the transform phase of the normalizer.
+      result = childCSEs_[foundIndex].getLexicalCount();
+      childCSEs_[foundIndex].incrementLexicalCount();
+    }
+
+  return result;
 }
 
 void CSEInfo::addCSERef(CommonSubExprRef *cse)
@@ -12259,12 +12455,38 @@ void CSEInfo::addCSERef(CommonSubExprRef *cse)
   CMPASSERT(name_ == cse->getName());
   cse->setId(consumers_.entries());
   consumers_.insert(cse);
+  if (cse->isALexicalRef())
+    numLexicalRefs_++;
+}
+
+void CSEInfo::replaceConsumerWithAnAlternative(CommonSubExprRef *c)
+{
+  Int32 idToReplace = c->getId();
+
+  if (consumers_[idToReplace] != c)
+    {
+      CollIndex foundPos = alternativeConsumers_.index(c);
+
+      CMPASSERT(foundPos != NULL_COLL_INDEX);
+      CMPASSERT(consumers_[idToReplace]->getOriginalRef() ==
+                c->getOriginalRef());
+      // c moves from the list of copies to the real list and another
+      // consumer moves the opposite way
+      alternativeConsumers_.removeAt(foundPos);
+      alternativeConsumers_.insert(consumers_[idToReplace]);
+      consumers_[idToReplace] = c;
+    }
 }
 
 // -----------------------------------------------------------------------
 // member functions for class CommonSubExprRef
 // -----------------------------------------------------------------------
 
+NABoolean CommonSubExprRef::isAChildOfTheMainQuery() const
+{
+  return (parentCSEId_ == CmpStatement::getCSEIdForMainQuery());
+}
+
 CommonSubExprRef::~CommonSubExprRef()
 {
 }
@@ -12275,13 +12497,19 @@ Int32 CommonSubExprRef::getArity() const
   return 1;
 }
 
-void CommonSubExprRef::addToCmpStatement()
+void CommonSubExprRef::addToCmpStatement(NABoolean lexicalRef)
 {
   NABoolean alreadySeen = TRUE;
 
   // look up whether a CSE with this name already exists
   CSEInfo *info = CmpCommon::statement()->getCSEInfo(internalName_);
 
+  // sanity check, make sure that the caller knows whether this is a
+  // lexical ref (generated with CommonSubExprRef constructor) or an
+  // expanded ref (generated with CommonSubExprRef::copyTopNode()
+  // before the bind phase)
+  CMPASSERT(isALexicalRef() == lexicalRef);
+
   if (!info)
     {
       // make a new object to hold a list of all references
@@ -12298,13 +12526,47 @@ void CommonSubExprRef::addToCmpStatement()
     CmpCommon::statement()->addCSEInfo(info);
 }
 
-NABoolean CommonSubExprRef::isFirstReference()
+void CommonSubExprRef::addParentRef(CommonSubExprRef *parentRef)
 {
-  return (
-       // this is the first reference added, or
-       id_ == 0 ||
-       // this is not yet added an no other reference has been added yet
-       id_ < 0 && CmpCommon::statement()->getCSEInfo(internalName_) == NULL);
+  // Establish the parent/child relationship between two
+  // CommonSubExprRef nodes, or between the main query and a
+  // CommonSubExprRef node (parentRef == NULL). Also, record
+  // the dependency between parent and child CSEs in the lexical
+  // dependency multigraph. Bookkeeping to do:
+  //
+  // - Add the child's CSE to the list of child CSEs of the
+  //   parent CSE
+  // - Set the data members in the child ref that point to the
+  //   parent CSE and parent ref
+
+  // parent info is for a parent CSE or for the main query
+  CSEInfo *parentInfo =
+    (parentRef ? CmpCommon::statement()->getCSEInfo(parentRef->getName())
+               : CmpCommon::statement()->getCSEInfoForMainQuery());
+  CSEInfo *childInfo =
+    CmpCommon::statement()->getCSEInfo(getName());
+
+  CMPASSERT(parentInfo && childInfo);
+
+  // Update the lexical CSE multigraph, and also return the
+  // count of previously existing edges in the graph.
+  // LexicalRefNumFromParent is set to a positive value only for
+  // lexical refs, the other refs will be handled later, in the SQO
+  // phase. This is since expanded refs may be processed before their
+  // lexical ref and we may not know this value yet.
+  lexicalRefNumFromParent_ =
+    parentInfo->addChildCSE(childInfo, isALexicalRef());
+
+  parentCSEId_ = parentInfo->getCSEId();
+  if (parentRef)
+    parentRefId_ = parentRef->getId();
+  else
+    parentRefId_ = -1;  // main query does not have a CommonSubExprRef
+}
+
+NABoolean CommonSubExprRef::isFirstReference() const
+{
+  return (CmpCommon::statement()->getCSEInfo(internalName_) == NULL);
 }
 
 void CommonSubExprRef::addLocalExpr(LIST(ExprNode *) &xlist,
@@ -12360,20 +12622,46 @@ RelExpr * CommonSubExprRef::copyTopNode(RelExpr *derivedNode,
   else
     result = static_cast<CommonSubExprRef *>(derivedNode);
 
+  // copy fields that are common for bound and unbound nodes
+  result->hbAccessOptionsFromCTE_ = hbAccessOptionsFromCTE_;
+
   if (nodeIsBound())
     {
       // if the node is bound, we assume that the copy is serving the same function
-      // as the original
+      // as the original, as an alternative, create an "alternative ref"
       result->setId(id_);
+
+      result->parentCSEId_ = parentCSEId_;
+      result->parentRefId_ = parentRefId_;
+      result->lexicalRefNumFromParent_ = lexicalRefNumFromParent_;
       result->columnList_ = columnList_;
+      result->nonVEGColumns_ = nonVEGColumns_;
+      result->commonInputs_ = commonInputs_;
       result->pushedPredicates_ = pushedPredicates_;
+      result->nonSharedPredicates_ = nonSharedPredicates_;
+      result->cseEstLogProps_ = cseEstLogProps_;
+      // don't copy the tempScan_
+
+      // Mark this as an alternative ref, not that this does not
+      // change the status of lexical vs. expanded ref
+      result->isAnExpansionOf_ = isAnExpansionOf_;
+      result->isAnAlternativeOf_ =
+        ( isAnAlternativeOf_ ? isAnAlternativeOf_ : this);
+
+      CmpCommon::statement()->getCSEInfo(internalName_)->
+        registerAnAlternativeConsumer(result);
     }
   else
-    // if the node is not bound, we assume that we created a new
-    // reference to a common subexpression, for example by referencing
-    // a CTE that itself contains another reference to a CTE (Common
-    // Table Expression)
-    result->addToCmpStatement();
+    {
+      // If the node is not bound, we assume that we created an
+      // "expanded" reference to a common subexpression in a tree that
+      // itself is a reference to a CTE (Common Table Expression).
+      // See the comment in RelMisc.h for method isAnExpandedRef()
+      // to explain the term "expanded" ref.
+      result->isAnExpansionOf_ =
+        (isAnExpansionOf_ ? isAnExpansionOf_ : this);
+      result->addToCmpStatement(FALSE);
+    }
 
   return result;
 }
@@ -12414,12 +12702,25 @@ Union * CommonSubExprRef::makeUnion(RelExpr *lc,
 
 void CommonSubExprRef::display()
 {
+  if (isAChildOfTheMainQuery())
+    printf("Parent: main query, lexical ref %d\n",
+           lexicalRefNumFromParent_);
+  else
+    printf("Parent: %s(consumer %d, lexical ref %d)\n",
+           CmpCommon::statement()->getCSEInfoById(
+                parentCSEId_)->getName().data(),
+           parentRefId_,
+           lexicalRefNumFromParent_);
   printf("Original columns:\n");
   columnList_.display();
   printf("\nCommon inputs:\n");
   commonInputs_.display();
   printf("\nPushed predicates:\n");
   pushedPredicates_.display();
+  printf("\nNon shared preds to be applied to scan:\n");
+  nonSharedPredicates_.display();
+  printf("\nPotential values for VEG rewrite:\n");
+  nonVEGColumns_.display();
 }
 
 void CommonSubExprRef::displayAll(const char *optionalId)
@@ -12434,16 +12735,30 @@ void CommonSubExprRef::displayAll(const char *optionalId)
         {
           CSEInfo *info = cses->at(i);
           CollIndex nc = info->getNumConsumers();
+          NABoolean isMainQuery =
+            (info->getCSEId() == CmpStatement::getCSEIdForMainQuery());
 
-          printf("==========================\n");
-          printf("CSE: %s (%d consumers)\n",
-                 info->getName().data(),
-                 nc);
+          if (isMainQuery)
+            {
+              printf("\n\n==========================\n");
+              printf("MainQuery:\n");
+            }
+          else
+            {
+              printf("\n\n==========================\n");
+              printf("CSE: %s (%d consumers, %d lexical ref(s), %d total execution(s))\n",
+                     info->getName().data(),
+                     nc,
+                     info->getNumLexicalRefs(),
+                     info->getTotalNumRefs());
+            }
 
-          const LIST(CSEInfo *) &children(info->getChildCSEs());
+          const LIST(CountedCSEInfo) &children(info->getChildCSEs());
 
           for (CollIndex j=0; j<children.entries(); j++)
-            printf("       references CSE: %s\n", children[j]->getName().data());
+            printf("       references CSE: %s %d times\n",
+                   children[j].getInfo()->getName().data(),
+                   children[j].getLexicalCount());
 
           if (info->getIdOfAnalyzingConsumer() >= 0)
             {
@@ -12479,36 +12794,50 @@ void CommonSubExprRef::displayAll(const char *optionalId)
                      info->getIdOfAnalyzingConsumer(),
                      outcome);
 
-              makeValueIdListFromBitVector(cols,
-                                           cCols,
-                                           info->getNeededColumns());
-              printf("  \ncolumns of temp table:\n");
+              makeValueIdListFromBitVector(
+                   cols,
+                   cCols,
+                   info->getNeededColumns());
+              printf("\n  columns of temp table:\n");
               cols.display();
-              printf("  \ncommonPredicates:\n");
+              printf("\n  commonPredicates:\n");
               info->getCommonPredicates().display();
               if (info->getVEGRefsWithDifferingConstants().entries() > 0)
                 {
-                  printf("  \nvegRefsWithDifferingConstants:\n");
+                  printf("\n  vegRefsWithDifferingConstants:\n");
                   info->getVEGRefsWithDifferingConstants().display();
                 }
               if (info->getVEGRefsWithDifferingInputs().entries() > 0)
                 {
-                  printf("  \nvegRefsWithDifferingInputs:\n");
+                  printf("\n  vegRefsWithDifferingInputs:\n");
                   info->getVEGRefsWithDifferingInputs().display();
                 }
-              printf("  \ntempTableName: %s\n",
-                     info->getTempTableName().
-                     getQualifiedNameAsAnsiString().data());
-              printf("  \nDDL of temp table:\n%s\n",
+              if (info->getCSETreeKeyColumns().entries() > 0)
+                {
+                  ValueIdList keyCols;
+
+                  makeValueIdListFromBitVector(
+                       keyCols,
+                       cCols,
+                       info->getCSETreeKeyColumns());
+                  printf("\n  CSE key columns:\n");
+                  keyCols.display();
+                }
+              printf("\n  DDL of temp table:\n%s\n",
                      info->getTempTableDDL().data());
-            }
+            } // analyzed
+          else if (info->getAnalysisOutcome(0) ==
+                   CSEInfo::ELIMINATED_IN_BINDER)
+            printf("  eliminated in the binder\n");
+          else if (!isMainQuery)
+            printf("  not yet analyzed\n");
 
           for (int c=0; c<nc; c++)
             {
-              printf("\n----- Consumer %d:\n", c);
+              printf("\n\n----- Consumer %d:\n", c);
               info->getConsumer(c)->display();
             }
-        }
+        } // a CSE we want to display
 }
 
 void CommonSubExprRef::makeValueIdListFromBitVector(ValueIdList &tgt,



[3/3] incubator-trafodion git commit: Merge [TRAFODION-2315] PR-928 Heuristic decision for common subexpressions

Posted by su...@apache.org.
Merge [TRAFODION-2315] PR-928 Heuristic decision for common subexpressions


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/222b6b76
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/222b6b76
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/222b6b76

Branch: refs/heads/master
Commit: 222b6b76d40149ef455200a0c3bbcf2f489e76d6
Parents: b6478e5 2d41556
Author: Suresh Subbiah <su...@apache.org>
Authored: Fri Jan 27 20:16:18 2017 +0000
Committer: Suresh Subbiah <su...@apache.org>
Committed: Fri Jan 27 20:16:18 2017 +0000

----------------------------------------------------------------------
 core/sql/arkcmp/CmpStatement.cpp         |  46 ++-
 core/sql/arkcmp/CmpStatement.h           |   7 +-
 core/sql/bin/SqlciErrors.txt             |   2 +-
 core/sql/comexe/ComTdbHdfsScan.h         |   2 +-
 core/sql/executor/ExHdfsScan.cpp         | 110 +++---
 core/sql/executor/ExHdfsScan.h           |  11 +-
 core/sql/exp/ExpLOBinterface.h           |   3 +
 core/sql/generator/GenPreCode.cpp        |   5 +
 core/sql/optimizer/BindRelExpr.cpp       |  18 +-
 core/sql/optimizer/ColStatDesc.cpp       |  96 +++++
 core/sql/optimizer/ColStatDesc.h         |  25 +-
 core/sql/optimizer/NormRelExpr.cpp       | 518 ++++++++++++++++++--------
 core/sql/optimizer/OptLogRelExpr.cpp     |  77 +++-
 core/sql/optimizer/RelExeUtil.cpp        |  42 +++
 core/sql/optimizer/RelExeUtil.h          |   3 +
 core/sql/optimizer/RelExpr.cpp           | 421 ++++++++++++++++++---
 core/sql/optimizer/RelExpr.h             |  16 +-
 core/sql/optimizer/RelGrby.h             |   5 +-
 core/sql/optimizer/RelJoin.h             |   5 +-
 core/sql/optimizer/RelMisc.h             | 188 +++++++++-
 core/sql/optimizer/RelScan.h             |   5 +-
 core/sql/optimizer/RelSet.h              |   5 +-
 core/sql/optimizer/VEGTable.cpp          |  29 +-
 core/sql/optimizer/ValueDesc.cpp         |   2 +-
 core/sql/optimizer/ValueDesc.h           |   2 +-
 core/sql/parser/sqlparser.y              |   4 +-
 core/sql/regress/compGeneral/EXPECTED045 |  61 ++-
 core/sql/regress/compGeneral/TEST045     |  11 +
 core/sql/sqlcomp/DefaultConstants.h      |   5 +
 core/sql/sqlcomp/nadefaults.cpp          |   9 +-
 30 files changed, 1385 insertions(+), 348 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/222b6b76/core/sql/bin/SqlciErrors.txt
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/222b6b76/core/sql/executor/ExHdfsScan.cpp
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/222b6b76/core/sql/generator/GenPreCode.cpp
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/222b6b76/core/sql/optimizer/BindRelExpr.cpp
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/222b6b76/core/sql/optimizer/NormRelExpr.cpp
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/222b6b76/core/sql/optimizer/RelExeUtil.cpp
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/222b6b76/core/sql/optimizer/RelExpr.cpp
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/222b6b76/core/sql/optimizer/RelJoin.h
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/222b6b76/core/sql/regress/compGeneral/EXPECTED045
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/222b6b76/core/sql/sqlcomp/DefaultConstants.h
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/222b6b76/core/sql/sqlcomp/nadefaults.cpp
----------------------------------------------------------------------