You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by db...@apache.org on 2018/02/15 17:45:16 UTC

[1/2] trafodion git commit: [TRAFODION-2645] Fixes to MDAM costing code found during inner join testing

Repository: trafodion
Updated Branches:
  refs/heads/master bd886c121 -> 087af70db


[TRAFODION-2645] Fixes to MDAM costing code found during inner join testing


Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/c008ebe5
Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/c008ebe5
Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/c008ebe5

Branch: refs/heads/master
Commit: c008ebe519f51e5344b650a9c88e66e973320084
Parents: 449572d
Author: Dave Birdsall <db...@apache.org>
Authored: Wed Feb 14 00:42:41 2018 +0000
Committer: Dave Birdsall <db...@apache.org>
Committed: Wed Feb 14 00:42:41 2018 +0000

----------------------------------------------------------------------
 core/sql/optimizer/ScanOptimizer.cpp | 85 ++++++++++++++++++-------------
 1 file changed, 51 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/trafodion/blob/c008ebe5/core/sql/optimizer/ScanOptimizer.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/ScanOptimizer.cpp b/core/sql/optimizer/ScanOptimizer.cpp
index 9265a55..d15f003 100644
--- a/core/sql/optimizer/ScanOptimizer.cpp
+++ b/core/sql/optimizer/ScanOptimizer.cpp
@@ -585,7 +585,7 @@ public:
   const CostScalar & getOptMDAMProbeSubsets() const;
   const ValueIdSet & getOptKeyPreds() const;
 
-  void compute(NABoolean & noExePreds /* out */);
+  void compute(NABoolean & noExePreds /* out */,CostScalar & incomingScanProbes);
 
   CollIndex getStopColumn() const;
 
@@ -10215,7 +10215,7 @@ void NewMDAMCostWA::compute()
   }
   else 
   {
-    incomingScanProbes_ = 1; // TODO: why is this necessary?
+    incomingScanProbes_ = 1;
     MDAM_DEBUG0(MTL2, "Mdam scan is a single probe");
   }
 
@@ -10282,46 +10282,52 @@ void NewMDAMCostWA::compute()
     CostScalar refinedSeqBlocksReadUpperBound = MINOF(seqBlocksReadUpperBound,
       seqBlocksFetchUpperBound + seqBlocksProbeUpperBound);
 
+    // There are two kinds of overhead that we model as part of "seeks".
+    // One kind is actual disk seeks. There may be one per subset, but if
+    // subsets are near each other (as quite often happens between 
+    // successive levels of probes and then fetches), there won't be a seek.
+    // The other kind is message interactions with the HBase RegionServer.
+    // There is one of these per subset.
+
+    // If the upper bound of the number of sequential blocks is the same as
+    // the single subset upper bound, seqBlocksReadUpperBound, we'll assume
+    // no seeks. (Why none instead of one? Single subset uses none, so we in
+    // effect subtract one to make an apples-to-apples comparison.) If the
+    // upper bound of the number of sequential blocks is close to the single
+    // subset bound, we'll use the difference as an upper bound on the number 
+    // of seeks.
+ 
     CostScalar seeks = csZero;
+    CostScalar totalSubsets = sumMDAMProbeSubsets + sumMDAMFetchSubsets - csOne;
     if (refinedSeqBlocksReadUpperBound < seqBlocksReadUpperBound)
       {
-        // Here we know that in the worst case, we won't touch all the blocks 
-        // that a single subset scan will. Let's assume worst case placement
-        // of the blocks we do touch -- that is, spread them out as much
-        // as possible. The number of gaps is the number of seeks.
-        CostScalar maxNumberOfChunks = MINOF(refinedSeqBlocksReadUpperBound,
-          seqBlocksReadUpperBound - refinedSeqBlocksReadUpperBound);
-
-        // We don't count a seek for the first block. (Single subset scan
-        // costing doesn't count it either. So this makes it apples-to-apples.)
-        seeks = maxNumberOfChunks - csOne;
+        CostScalar difference = 
+          seqBlocksReadUpperBound - refinedSeqBlocksReadUpperBound;
+        seeks = MINOF(difference,totalSubsets);
       }
 
-    // Add into the seeks some overhead per subset. Not every subset will cause
-    // a seek, but there is some overhead per subset. (We subtract one, because
-    // simple scans don't charge for their one subset. That makes the comparison
-    // more applies-to-apples.)
-
-    CostScalar totalSubsets = sumMDAMProbeSubsets + sumMDAMFetchSubsets - csOne;
+    // Add into the seeks some overhead per subset. In our experience, subsets
+    // are more expensive than disk seeks, so they get multiplied by a
+    // subset factor.
+   
     CostScalar MDAMSubsetAdjustmentFactor = ActiveSchemaDB()->getDefaults().getAsDouble(MDAM_SUBSET_FACTOR);
     totalSubsets *= MDAMSubsetAdjustmentFactor;
     seeks += totalSubsets;
 
-    CostScalar rowSize = optimizer_.getIndexDesc()->getRecordSizeInKb() * csOneKiloBytes;
-    CostScalar rowSizeFactor = optimizer_.scmRowSizeFactor(rowSize);
-    totRowsProcessed *= rowSizeFactor;
-
-    CostScalar randIORowSizeFactor = 
-      optimizer_.scmRowSizeFactor(rowSize, ScanOptimizer::RAND_IO_ROWSIZE_FACTOR);
-    seeks *= randIORowSizeFactor;
-
-    CostScalar seqIORowSizeFactor = 
-      optimizer_.scmRowSizeFactor(rowSize, ScanOptimizer::SEQ_IO_ROWSIZE_FACTOR);
-    refinedSeqBlocksReadUpperBound *= seqIORowSizeFactor;
-
     CostScalar totalSeqKBRead = refinedSeqBlocksReadUpperBound *
       optimizer_.getIndexDesc()->getBlockSizeInKb();
 
+    // All the quantities we have calculated so far are from the standpoint of
+    // a single scan probe. So if we have multiple incoming scan probes (that is,
+    // if we are the inner table of a nested loop join), we need to multiply the
+    // cost by the number of incoming scan probes.
+    if (isMultipleScanProbes_)  // the "if" really isn't needed; but is useful for debug stops
+      {
+        totRowsProcessed *= incomingScanProbes_;
+        seeks *= incomingScanProbes_;
+        totalSeqKBRead *= incomingScanProbes_;
+      }
+
     scmCost_ = optimizer_.scmComputeMDAMCostForHbase(totRowsProcessed, seeks, 
                                                      totalSeqKBRead, incomingScanProbes_);
   }
@@ -10413,7 +10419,7 @@ void NewMDAMCostWA::computeDisjunct()
                                        singleSubsetSize_,
 				       disjunctIndex_);
 
-  prefixWA.compute(noExePreds_ /*out */);
+  prefixWA.compute(noExePreds_ /*out */, incomingScanProbes_);
   CollIndex stopColumn = prefixWA.getStopColumn();
   disjunctOptMDAMFetchRows_ = prefixWA.getOptMDAMFetchRows();
   disjunctOptMDAMProbeRows_ = prefixWA.getOptMDAMProbeRows();
@@ -10567,7 +10573,7 @@ NewMDAMOptimalDisjunctPrefixWA::~NewMDAMOptimalDisjunctPrefixWA()
 // key predicates, then the output parameter noExePreds will be
 // set to FALSE. Otherwise we leave it untouched.
 
-void NewMDAMOptimalDisjunctPrefixWA::compute(NABoolean & noExePreds /*out*/)
+void NewMDAMOptimalDisjunctPrefixWA::compute(NABoolean & noExePreds /*out*/,CostScalar & incomingScanProbes)
 {
   // Cumulative probe counters (MDAM is a recursive algorithm; this
   // represents the cost of recursion)
@@ -10702,9 +10708,19 @@ void NewMDAMOptimalDisjunctPrefixWA::compute(NABoolean & noExePreds /*out*/)
 
     // Calculate the number of fetch rows and fetch subsets for that column
 
+    // Note: Unfortunately, the histograms logic calculates the row counts
+    // from the standpoint of the result of a join while here we are trying
+    // to calculate from the standpoint of a single outer row. So, we divide
+    // the number of rows fetched (MDAMFetchRows) by the number of incoming
+    // scan probes. Note that for probes, we already took care of this by
+    // examining the actual predicates (e.g. if it is equality, we set the
+    // column UEC to one above).
+
     CostScalar MDAMFetchRows = disjunctHistograms_.getColStatsForColumn(
                    optimizer_.getIndexDesc()->
                    getIndexKey()[i]).getRowcount().getCeiling();
+    if (crossProductApplied_)
+      MDAMFetchRows = MDAMFetchRows / incomingScanProbes;
     CostScalar MDAMFetchSubsets = previousColumnMDAMProbeRows * MDAMIntervalEstimate;
 
     MDAM_DEBUG1(MTL2,"MDAM Fetch Rows on this column: %f",MDAMFetchRows.value());
@@ -10809,8 +10825,9 @@ void NewMDAMOptimalDisjunctPrefixWA::applyPredsToHistogram(const ValueIdSet * pr
           // estimate the cost factors (rows, uecs)
           // Therefore, apply the cross product to the disjunctHistograms only once.
           // Use the disjunctHistograms as normal after cross product is applied.
-          // ??? It seems that we need do the cross product even though there
-          // ??? is no pred on the current column. This is not being done?
+          // Note that this causes the row counts to be scaled up; our caller
+          // will have to compensate after this point by dividing by the number
+          // of incoming scan probes.
           crossProductApplied_ = TRUE;
           disjunctHistograms_.
                 applyPredicatesWhenMultipleProbes(


[2/2] trafodion git commit: Merge [TRAFODION-2645] Fixes to MDAM costing code for nested join cases

Posted by db...@apache.org.
Merge [TRAFODION-2645] Fixes to MDAM costing code for nested join cases


Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/087af70d
Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/087af70d
Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/087af70d

Branch: refs/heads/master
Commit: 087af70db947fcce6e32483fcaea4015a924186e
Parents: bd886c1 c008ebe
Author: Dave Birdsall <db...@apache.org>
Authored: Thu Feb 15 17:44:30 2018 +0000
Committer: Dave Birdsall <db...@apache.org>
Committed: Thu Feb 15 17:44:30 2018 +0000

----------------------------------------------------------------------
 core/sql/optimizer/ScanOptimizer.cpp | 85 ++++++++++++++++++-------------
 1 file changed, 51 insertions(+), 34 deletions(-)
----------------------------------------------------------------------