You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2015/09/27 08:07:35 UTC

hive git commit: HIVE-11211 : Reset the fields in JoinStatsRule in StatsRulesProcFactory (Pengcheng Xiong, reviewed by Laljo John Pullokkaran)

Repository: hive
Updated Branches:
  refs/heads/branch-1.2 e7c16699f -> f428af1d2


HIVE-11211 : Reset the fields in JoinStatsRule in StatsRulesProcFactory (Pengcheng Xiong, reviewed by Laljo John Pullokkaran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f428af1d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f428af1d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f428af1d

Branch: refs/heads/branch-1.2
Commit: f428af1d2908588dd68eb30cde2f158bf9ef04c0
Parents: e7c1669
Author: Hari Subramaniyan <ha...@apache.org>
Authored: Wed Jul 15 13:15:34 2015 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Sat Sep 26 23:03:45 2015 -0700

----------------------------------------------------------------------
 .../stats/annotation/StatsRulesProcFactory.java | 42 ++++++++++----------
 1 file changed, 22 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f428af1d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 0982059..376d42c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -1013,17 +1013,14 @@ public class StatsRulesProcFactory {
    */
   public static class JoinStatsRule extends DefaultStatsRule implements NodeProcessor {
 
-    private boolean pkfkInferred = false;
-    private long newNumRows = 0;
-    private List<Operator<? extends OperatorDesc>> parents;
-    private CommonJoinOperator<? extends JoinDesc> jop;
-    private int numAttr = 1;
 
     @Override
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
-      jop = (CommonJoinOperator<? extends JoinDesc>) nd;
-      parents = jop.getParentOperators();
+      long newNumRows = 0;
+      CommonJoinOperator<? extends JoinDesc> jop = (CommonJoinOperator<? extends JoinDesc>) nd;
+      List<Operator<? extends OperatorDesc>> parents = jop.getParentOperators();
+      int numAttr = 1;
       AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
       HiveConf conf = aspCtx.getConf();
       boolean allStatsAvail = true;
@@ -1062,7 +1059,7 @@ public class StatsRulesProcFactory {
           numAttr = keyExprs.size();
 
           // infer PK-FK relationship in single attribute join case
-          inferPKFKRelationship();
+          long inferredRowCount = inferPKFKRelationship(numAttr, parents, jop);
           // get the join keys from parent ReduceSink operators
           for (int pos = 0; pos < parents.size(); pos++) {
             ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos);
@@ -1149,7 +1146,7 @@ public class StatsRulesProcFactory {
 
           // update join statistics
           stats.setColumnStats(outColStats);
-          long newRowCount = pkfkInferred ? newNumRows : computeNewRowCount(rowCounts, denom);
+          long newRowCount = inferredRowCount !=-1 ? inferredRowCount : computeNewRowCount(rowCounts, denom);
           updateStatsForJoinType(stats, newRowCount, jop, rowCountParents);
           jop.setStatistics(stats);
 
@@ -1180,7 +1177,7 @@ public class StatsRulesProcFactory {
           }
 
           long maxDataSize = parentSizes.get(maxRowIdx);
-          long newNumRows = StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, (numParents - 1)), joinFactor);
+          newNumRows = StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, (numParents - 1)), joinFactor);
           long newDataSize = StatsUtils.safeMult(StatsUtils.safeMult(maxDataSize, (numParents - 1)), joinFactor);
           Statistics wcStats = new Statistics();
           wcStats.setNumRows(newNumRows);
@@ -1195,15 +1192,17 @@ public class StatsRulesProcFactory {
       return null;
     }
 
-    private void inferPKFKRelationship() {
+    private long inferPKFKRelationship(int numAttr, List<Operator<? extends OperatorDesc>> parents,
+        CommonJoinOperator<? extends JoinDesc> jop) {
+      long newNumRows = -1;
       if (numAttr == 1) {
         // If numAttr is 1, this means we join on one single key column.
         Map<Integer, ColStatistics> parentsWithPK = getPrimaryKeyCandidates(parents);
 
         // We only allow one single PK.
         if (parentsWithPK.size() != 1) {
-          LOG.debug("STATS-" + jop.toString() + ": detects multiple PK parents.");
-          return;
+          LOG.debug("STATS-" + jop.toString() + ": detects none/multiple PK parents.");
+          return newNumRows;
         }
         Integer pkPos = parentsWithPK.keySet().iterator().next();
         ColStatistics csPK = parentsWithPK.values().iterator().next();
@@ -1215,7 +1214,7 @@ public class StatsRulesProcFactory {
         // csfKs.size() + 1 == parents.size() means we have a single PK and all
         // the rest ops are FKs.
         if (csFKs.size() + 1 == parents.size()) {
-          getSelectivity(parents, pkPos, csPK, csFKs);
+          newNumRows = getCardinality(parents, pkPos, csPK, csFKs, jop);
 
           // some debug information
           if (isDebugEnabled) {
@@ -1236,16 +1235,17 @@ public class StatsRulesProcFactory {
           }
         }
       }
+      return newNumRows;
     }
 
     /**
-     * Get selectivity of reduce sink operators.
+     * Get cardinality of reduce sink operators.
      * @param csPK - ColStatistics for a single primary key
      * @param csFKs - ColStatistics for multiple foreign keys
      */
-    private void getSelectivity(List<Operator<? extends OperatorDesc>> ops, Integer pkPos, ColStatistics csPK,
-        Map<Integer, ColStatistics> csFKs) {
-      this.pkfkInferred = true;
+    private long getCardinality(List<Operator<? extends OperatorDesc>> ops, Integer pkPos,
+        ColStatistics csPK, Map<Integer, ColStatistics> csFKs,
+        CommonJoinOperator<? extends JoinDesc> jop) {
       double pkfkSelectivity = Double.MAX_VALUE;
       int fkInd = -1;
       // 1. We iterate through all the operators that have candidate FKs and
@@ -1290,13 +1290,15 @@ public class StatsRulesProcFactory {
           distinctVals.add(csFK.getCountDistint());
         }
       }
+      long newNumRows;
       if (csFKs.size() == 1) {
         // there is only one FK
-        this.newNumRows = newrows;
+        newNumRows = newrows;
       } else {
         // there is more than one FK
-        this.newNumRows = this.computeNewRowCount(rowCounts, getDenominator(distinctVals));
+        newNumRows = this.computeNewRowCount(rowCounts, getDenominator(distinctVals));
       }
+      return newNumRows;
     }
 
     private float getSelectivitySimpleTree(Operator<? extends OperatorDesc> op) {