You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@trafodion.apache.org by su...@apache.org on 2015/12/01 17:13:02 UTC

[1/2] incubator-trafodion git commit: [TRAFODION-1641] Fix MDAM costing bugs

Repository: incubator-trafodion
Updated Branches:
  refs/heads/master 05f4d35a4 -> 7072c24fa


[TRAFODION-1641] Fix MDAM costing bugs


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/3415443d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/3415443d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/3415443d

Branch: refs/heads/master
Commit: 3415443dee6e4e1074c22342222e683c21ff31eb
Parents: e54c732
Author: Dave Birdsall <db...@apache.org>
Authored: Tue Nov 24 19:55:38 2015 +0000
Committer: Dave Birdsall <db...@apache.org>
Committed: Tue Nov 24 19:55:38 2015 +0000

----------------------------------------------------------------------
 core/sql/optimizer/ScanOptimizer.cpp | 59 +++++++++++++++++++++++++------
 core/sql/regress/seabase/EXPECTED010 |  2 +-
 2 files changed, 49 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/3415443d/core/sql/optimizer/ScanOptimizer.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/ScanOptimizer.cpp b/core/sql/optimizer/ScanOptimizer.cpp
index 65ec12a..f18a352 100644
--- a/core/sql/optimizer/ScanOptimizer.cpp
+++ b/core/sql/optimizer/ScanOptimizer.cpp
@@ -553,11 +553,22 @@ private:
   // Estimated rows iff multiple probes
   CostScalar multiProbesDataRows_;
   // >>>>>>>>>>>>>>>>> Current prefix related members <<<<<<<<<<<<<<<<<
-  // # of subsets of each effective probe
+  // # of subsets of each effective probe at the current level
   CostScalar prefixSubsets_;
+  // cumulative # of subsets of each effective probe
+  // Why do we care? MDAM is a recursive algorithm. It first materializes
+  // values for the first key column. For each of those, it materializes 
+  // values for the second key column. And so on. Each of these levels adds
+  // progressively more cost which we must take into account. If we look
+  // only at prefixSubsets_ (that is, the current column level), we may be
+  // misled into thinking that adding more levels of column traversal is
+  // free. Which it is not. Moreover, as the number of rows approaches the
+  // total number of rows in the table, it is akin to adding an additional
+  // table scan.
+  CostScalar cumulativePrefixSubsets_;
   // # of subset seeks of each effective probe
   CostScalar prefixSubsetsAsSeeks_;
-  // # of rows of all probes.
+  // # of rows of all probes at the current column level
   CostScalar prefixRows_;
   // # of seeks of all probes.
   CostScalar prefixRqsts_;
@@ -8797,6 +8808,7 @@ MDAMOptimalDisjunctPrefixWA::MDAMOptimalDisjunctPrefixWA
   ,lastColumnPosition_(optimizer.computeLastKeyColumnOfDisjunct(keyPredsByCol))
   ,firstColOverlaps_(FALSE)
   ,prefixSubsets_(csOne) // MDAM subsets
+  ,cumulativePrefixSubsets_(csZero)
   ,prefixSubsetsAsSeeks_(csOne) // MDAM subsets for all probes
   ,prefixRows_(0)
   ,prefixRqsts_(csOne)
@@ -9537,10 +9549,13 @@ void MDAMOptimalDisjunctPrefixWA::updateMinPrefix()
   CostScalar seqKBytesPerScan;
   Cost *scmCost = NULL;
 
+  cumulativePrefixSubsets_ += prefixSubsets_;
+
   MDAM_DEBUG2(MTL2, "Disjunct: %d, Prefix Column: %d", disjunctIndex_, prefixColumnPosition_);
   MDAM_DEBUG1(MTL2, "Incoming Probes: %f:", incomingProbes_.value());
   MDAM_DEBUG1(MTL2, "Disjunct Failed Probes: %f:", failedProbes_.value());
   MDAM_DEBUG1(MTL2, "Prefix Subsets: %f:", prefixSubsets_.value());
+  MDAM_DEBUG1(MTL2, "Cumulative Prefix Subsets: %f:", cumulativePrefixSubsets_.value());
   MDAM_DEBUG1(MTL2, "Prefix Requests (probes * Subsets): %f:", prefixRqsts_.value());
   MDAM_DEBUG1(MTL2, "Prefix Rows: %f:", prefixRows_.value());
   MDAM_DEBUG1(MTL2, "Prefix Seeks %f:", prefixSeeks_.value());
@@ -9555,7 +9570,10 @@ void MDAMOptimalDisjunctPrefixWA::updateMinPrefix()
     CostScalar rowSizeFactor = optimizer_.scmRowSizeFactor(rowSize);
     CostScalar outputRowSizeFactor = optimizer_.scmRowSizeFactor(outputRowSize);
 
-    CostScalar scmPrefixRows = prefixRows_ * rowSizeFactor;
+    // adding cumulativePrefixSubsets_ represents the row handling costs of the probes of
+    // the MDAM algorithm as it traverses over key columns; the algorithm is recursive
+    // and thus has cumulative costs
+    CostScalar scmPrefixRows = (prefixRows_ + cumulativePrefixSubsets_) * rowSizeFactor;
     CostScalar scmPrefixOutputRows = prefixRows_ * outputRowSizeFactor;
 
     CostScalar rowSizeFactorSeqIO = optimizer_.scmRowSizeFactor(rowSize, 
@@ -9681,6 +9699,19 @@ void MDAMOptimalDisjunctPrefixWA::updateMinPrefix()
         //  This is a heuristics in that we unconditionally include the last key column 
         //  with IN list (OR preds) predicate without going through the cost comparison 
         //  step.
+        //
+        // Updated comments: The commentary above is incorrect but I don't know quite
+        // what to do with it yet. MDAM at run time is a recursive algorithm. In the 
+        // example above, it will materialize values in the A column, and for each one,
+        // do a subset access on the second column. So the cost is a sum of the
+        // materialization cost on the first column and the subset access on the second.
+        // If there is a third key column C with no predicates on it, it would be 
+        // inefficient to go MDAM to the last column position; rather it would be better
+        // to use B as the stop column. That is, do subsets on each distinct value of (A,B),
+        // rather than do subsets on each distinct (A,B,C). The larger the UEC of C, the
+        // more gross the inefficiency. Unfortunately, the code below will cause us to 
+        // go MDAM to column C. In reference to the comments above, we need to devise
+        // a better way to estimate cost in the presence of RangeSpecs.
         if ( (CmpCommon::getDefault(RANGESPEC_TRANSFORMATION) == DF_ON ) &&
               optimizer_.getDisjuncts().containsOrPredsInRanges() &&
               prefixColumnPosition_ == (lastColumnPosition_ - 1) 
@@ -9728,14 +9759,20 @@ void MDAMOptimalDisjunctPrefixWA::updateMinPrefix()
       optSeeks_ = prefixSeeks_;
       optSeqKBRead_ = prefixKBRead_;
       optKeyPreds_.insert(prefixKeyPreds_); // is a copy more efficient?
-      // changing for stopColumn_ logic to flow throgh.
-      // Now it will consider all the coulmns present in the disjunct.
-      if (CmpCommon::getDefault(RANGESPEC_TRANSFORMATION) == DF_ON)
-	// && !missingKeyColumnExists())
-	//&& mdamForced_)
-	stopColumn_ = lastColumnPosition_ - 1;
-      else
-	stopColumn_ = prefixColumnPosition_;
+
+      // Note: Formerly there was code here that would set stopColumn_
+      // to the last column position if the CQD RANGESPEC_TRANSFORMATION
+      // was on. This is incorrect; it would cause us to use MDAM to 
+      // traverse through all columns always, even though it may be
+      // grossly inefficient to do so. (See commentary earlier in this
+      // method.) As it stands now, so long as there are no RangeSpec
+      // key predicates, this code will correctly pick the stop column.
+      // If there are RangeSpec predicates, code earlier in this method
+      // may cause us to only consider MDAM traversing on all columns.
+      // We can improve this later by improving how RangeSpec predicates
+      // are costed for MDAM.
+      stopColumn_ = prefixColumnPosition_;
+
       prevColChosen_ = TRUE;
 
       delete pMinCost_;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/3415443d/core/sql/regress/seabase/EXPECTED010
----------------------------------------------------------------------
diff --git a/core/sql/regress/seabase/EXPECTED010 b/core/sql/regress/seabase/EXPECTED010
index abb0d70..b4cbf66 100644
--- a/core/sql/regress/seabase/EXPECTED010
+++ b/core/sql/regress/seabase/EXPECTED010
@@ -10097,7 +10097,7 @@ LC   RC   OP   OPERATOR              OPT       DESCRIPTION           CARD
 ---- ---- ---- --------------------  --------  --------------------  ---------
 
 1    .    2    root                                                  1.00E+009
-.    .    1    trafodion_index_scan            T010IX1               1.00E+009
+.    .    1    trafodion_scan                  T010T4                1.00E+009
 
 --- SQL operation complete.
 >>execute s;

[2/2] incubator-trafodion git commit: Merge remote branch 'origin/pr/186/head' into mrg_86

Posted by su...@apache.org.

Merge remote branch 'origin/pr/186/head' into mrg_86


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/7072c24f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/7072c24f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/7072c24f

Branch: refs/heads/master
Commit: 7072c24fa9c92790d4af5ebf3b3466cc461d5b56
Parents: 05f4d35 3415443
Author: Suresh Subbiah <su...@apache.org>
Authored: Tue Dec 1 16:12:41 2015 +0000
Committer: Suresh Subbiah <su...@apache.org>
Committed: Tue Dec 1 16:12:41 2015 +0000

----------------------------------------------------------------------
 core/sql/optimizer/ScanOptimizer.cpp | 59 +++++++++++++++++++++++++------
 core/sql/regress/seabase/EXPECTED010 |  2 +-
 2 files changed, 49 insertions(+), 12 deletions(-)
----------------------------------------------------------------------