You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by su...@apache.org on 2015/12/01 17:13:02 UTC
[1/2] incubator-trafodion git commit: [TRAFODION-1641] Fix MDAM
costing bugs
Repository: incubator-trafodion
Updated Branches:
refs/heads/master 05f4d35a4 -> 7072c24fa
[TRAFODION-1641] Fix MDAM costing bugs
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/3415443d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/3415443d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/3415443d
Branch: refs/heads/master
Commit: 3415443dee6e4e1074c22342222e683c21ff31eb
Parents: e54c732
Author: Dave Birdsall <db...@apache.org>
Authored: Tue Nov 24 19:55:38 2015 +0000
Committer: Dave Birdsall <db...@apache.org>
Committed: Tue Nov 24 19:55:38 2015 +0000
----------------------------------------------------------------------
core/sql/optimizer/ScanOptimizer.cpp | 59 +++++++++++++++++++++++++------
core/sql/regress/seabase/EXPECTED010 | 2 +-
2 files changed, 49 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/3415443d/core/sql/optimizer/ScanOptimizer.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/ScanOptimizer.cpp b/core/sql/optimizer/ScanOptimizer.cpp
index 65ec12a..f18a352 100644
--- a/core/sql/optimizer/ScanOptimizer.cpp
+++ b/core/sql/optimizer/ScanOptimizer.cpp
@@ -553,11 +553,22 @@ private:
// Estimated rows iff multiple probes
CostScalar multiProbesDataRows_;
// >>>>>>>>>>>>>>>>> Current prefix related members <<<<<<<<<<<<<<<<<
- // # of subsets of each effective probe
+ // # of subsets of each effective probe at the current level
CostScalar prefixSubsets_;
+ // cumulative # of subsets of each effective probe
+ // Why do we care? MDAM is a recursive algorithm. It first materializes
+ // values for the first key column. For each of those, it materializes
+ // values for the second key column. And so on. Each of these levels adds
+ // progressively more cost which we must take into account. If we look
+ // only at prefixSubsets_ (that is, the current column level), we may be
+ // misled into thinking that adding more levels of column traversal is
+ // free. Which it is not. Moreover, as the number of rows approaches the
+ // total number of rows in the table, it is akin to adding an additional
+ // table scan.
+ CostScalar cumulativePrefixSubsets_;
// # of subset seeks of each effective probe
CostScalar prefixSubsetsAsSeeks_;
- // # of rows of all probes.
+ // # of rows of all probes at the current column level
CostScalar prefixRows_;
// # of seeks of all probes.
CostScalar prefixRqsts_;
@@ -8797,6 +8808,7 @@ MDAMOptimalDisjunctPrefixWA::MDAMOptimalDisjunctPrefixWA
,lastColumnPosition_(optimizer.computeLastKeyColumnOfDisjunct(keyPredsByCol))
,firstColOverlaps_(FALSE)
,prefixSubsets_(csOne) // MDAM subsets
+ ,cumulativePrefixSubsets_(csZero)
,prefixSubsetsAsSeeks_(csOne) // MDAM subsets for all probes
,prefixRows_(0)
,prefixRqsts_(csOne)
@@ -9537,10 +9549,13 @@ void MDAMOptimalDisjunctPrefixWA::updateMinPrefix()
CostScalar seqKBytesPerScan;
Cost *scmCost = NULL;
+ cumulativePrefixSubsets_ += prefixSubsets_;
+
MDAM_DEBUG2(MTL2, "Disjunct: %d, Prefix Column: %d", disjunctIndex_, prefixColumnPosition_);
MDAM_DEBUG1(MTL2, "Incoming Probes: %f:", incomingProbes_.value());
MDAM_DEBUG1(MTL2, "Disjunct Failed Probes: %f:", failedProbes_.value());
MDAM_DEBUG1(MTL2, "Prefix Subsets: %f:", prefixSubsets_.value());
+ MDAM_DEBUG1(MTL2, "Cumulative Prefix Subsets: %f:", cumulativePrefixSubsets_.value());
MDAM_DEBUG1(MTL2, "Prefix Requests (probes * Subsets): %f:", prefixRqsts_.value());
MDAM_DEBUG1(MTL2, "Prefix Rows: %f:", prefixRows_.value());
MDAM_DEBUG1(MTL2, "Prefix Seeks %f:", prefixSeeks_.value());
@@ -9555,7 +9570,10 @@ void MDAMOptimalDisjunctPrefixWA::updateMinPrefix()
CostScalar rowSizeFactor = optimizer_.scmRowSizeFactor(rowSize);
CostScalar outputRowSizeFactor = optimizer_.scmRowSizeFactor(outputRowSize);
- CostScalar scmPrefixRows = prefixRows_ * rowSizeFactor;
+ // adding cumulativePrefixSubsets_ represents the row handling costs of the probes of
+ // the MDAM algorithm as it traverses over key columns; the algorithm is recursive
+ // and thus has cumulative costs
+ CostScalar scmPrefixRows = (prefixRows_ + cumulativePrefixSubsets_) * rowSizeFactor;
CostScalar scmPrefixOutputRows = prefixRows_ * outputRowSizeFactor;
CostScalar rowSizeFactorSeqIO = optimizer_.scmRowSizeFactor(rowSize,
@@ -9681,6 +9699,19 @@ void MDAMOptimalDisjunctPrefixWA::updateMinPrefix()
// This is a heuristics in that we unconditionally include the last key column
// with IN list (OR preds) predicate without going through the cost comparison
// step.
+ //
+ // Updated comments: The commentary above is incorrect but I don't know quite
+ // what to do with it yet. MDAM at run time is a recursive algorithm. In the
+ // example above, it will materialize values in the A column, and for each one,
+ // do a subset access on the second column. So the cost is a sum of the
+ // materialization cost on the first column and the subset access on the second.
+ // If there is a third key column C with no predicates on it, it would be
+ // inefficient to go MDAM to the last column position; rather it would be better
+ // to use B as the stop column. That is, do subsets on each distinct value of (A,B),
+ // rather than do subsets on each distinct (A,B,C). The larger the UEC of C, the
+ // more gross the inefficiency. Unfortunately, the code below will cause us to
+ // go MDAM to column C. In reference to the comments above, we need to devise
+ // a better way to estimate cost in the presence of RangeSpecs.
if ( (CmpCommon::getDefault(RANGESPEC_TRANSFORMATION) == DF_ON ) &&
optimizer_.getDisjuncts().containsOrPredsInRanges() &&
prefixColumnPosition_ == (lastColumnPosition_ - 1)
@@ -9728,14 +9759,20 @@ void MDAMOptimalDisjunctPrefixWA::updateMinPrefix()
optSeeks_ = prefixSeeks_;
optSeqKBRead_ = prefixKBRead_;
optKeyPreds_.insert(prefixKeyPreds_); // is a copy more efficient?
- // changing for stopColumn_ logic to flow throgh.
- // Now it will consider all the coulmns present in the disjunct.
- if (CmpCommon::getDefault(RANGESPEC_TRANSFORMATION) == DF_ON)
- // && !missingKeyColumnExists())
- //&& mdamForced_)
- stopColumn_ = lastColumnPosition_ - 1;
- else
- stopColumn_ = prefixColumnPosition_;
+
+ // Note: Formerly there was code here that would set stopColumn_
+ // to the last column position if the CQD RANGESPEC_TRANSFORMATION
+ // was on. This is incorrect; it would cause us to use MDAM to
+ // traverse through all columns always, even though it may be
+ // grossly inefficient to do so. (See commentary earlier in this
+ // method.) As it stands now, so long as there are no RangeSpec
+ // key predicates, this code will correctly pick the stop column.
+ // If there are RangeSpec predicates, code earlier in this method
+ // may cause us to only consider MDAM traversing on all columns.
+ // We can improve this later by improving how RangeSpec predicates
+ // are costed for MDAM.
+ stopColumn_ = prefixColumnPosition_;
+
prevColChosen_ = TRUE;
delete pMinCost_;
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/3415443d/core/sql/regress/seabase/EXPECTED010
----------------------------------------------------------------------
diff --git a/core/sql/regress/seabase/EXPECTED010 b/core/sql/regress/seabase/EXPECTED010
index abb0d70..b4cbf66 100644
--- a/core/sql/regress/seabase/EXPECTED010
+++ b/core/sql/regress/seabase/EXPECTED010
@@ -10097,7 +10097,7 @@ LC RC OP OPERATOR OPT DESCRIPTION CARD
---- ---- ---- -------------------- -------- -------------------- ---------
1 . 2 root 1.00E+009
-. . 1 trafodion_index_scan T010IX1 1.00E+009
+. . 1 trafodion_scan T010T4 1.00E+009
--- SQL operation complete.
>>execute s;
[2/2] incubator-trafodion git commit: Merge remote branch
'origin/pr/186/head' into mrg_86
Posted by su...@apache.org.
Merge remote branch 'origin/pr/186/head' into mrg_86
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/7072c24f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/7072c24f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/7072c24f
Branch: refs/heads/master
Commit: 7072c24fa9c92790d4af5ebf3b3466cc461d5b56
Parents: 05f4d35 3415443
Author: Suresh Subbiah <su...@apache.org>
Authored: Tue Dec 1 16:12:41 2015 +0000
Committer: Suresh Subbiah <su...@apache.org>
Committed: Tue Dec 1 16:12:41 2015 +0000
----------------------------------------------------------------------
core/sql/optimizer/ScanOptimizer.cpp | 59 +++++++++++++++++++++++++------
core/sql/regress/seabase/EXPECTED010 | 2 +-
2 files changed, 49 insertions(+), 12 deletions(-)
----------------------------------------------------------------------