You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by su...@apache.org on 2015/07/20 22:12:35 UTC
[25/50] [abbrv] hive git commit: HIVE-11211 : Reset the fields in
JoinStatsRule in StatsRulesProcFactory (Pengcheng Xiong,
reviewed by Laljo John Pullokkaran)
HIVE-11211 : Reset the fields in JoinStatsRule in StatsRulesProcFactory (Pengcheng Xiong, reviewed by Laljo John Pullokkaran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/42326958
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/42326958
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/42326958
Branch: refs/heads/spark
Commit: 42326958148c2558be9c3d4dfe44c9e735704617
Parents: 4d984bd
Author: Hari Subramaniyan <ha...@apache.org>
Authored: Wed Jul 15 13:15:34 2015 -0700
Committer: Hari Subramaniyan <ha...@apache.org>
Committed: Wed Jul 15 13:15:34 2015 -0700
----------------------------------------------------------------------
.../stats/annotation/StatsRulesProcFactory.java | 42 ++++++++++----------
1 file changed, 22 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/42326958/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 0982059..376d42c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -1013,17 +1013,14 @@ public class StatsRulesProcFactory {
*/
public static class JoinStatsRule extends DefaultStatsRule implements NodeProcessor {
- private boolean pkfkInferred = false;
- private long newNumRows = 0;
- private List<Operator<? extends OperatorDesc>> parents;
- private CommonJoinOperator<? extends JoinDesc> jop;
- private int numAttr = 1;
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
- jop = (CommonJoinOperator<? extends JoinDesc>) nd;
- parents = jop.getParentOperators();
+ long newNumRows = 0;
+ CommonJoinOperator<? extends JoinDesc> jop = (CommonJoinOperator<? extends JoinDesc>) nd;
+ List<Operator<? extends OperatorDesc>> parents = jop.getParentOperators();
+ int numAttr = 1;
AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
HiveConf conf = aspCtx.getConf();
boolean allStatsAvail = true;
@@ -1062,7 +1059,7 @@ public class StatsRulesProcFactory {
numAttr = keyExprs.size();
// infer PK-FK relationship in single attribute join case
- inferPKFKRelationship();
+ long inferredRowCount = inferPKFKRelationship(numAttr, parents, jop);
// get the join keys from parent ReduceSink operators
for (int pos = 0; pos < parents.size(); pos++) {
ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos);
@@ -1149,7 +1146,7 @@ public class StatsRulesProcFactory {
// update join statistics
stats.setColumnStats(outColStats);
- long newRowCount = pkfkInferred ? newNumRows : computeNewRowCount(rowCounts, denom);
+ long newRowCount = inferredRowCount !=-1 ? inferredRowCount : computeNewRowCount(rowCounts, denom);
updateStatsForJoinType(stats, newRowCount, jop, rowCountParents);
jop.setStatistics(stats);
@@ -1180,7 +1177,7 @@ public class StatsRulesProcFactory {
}
long maxDataSize = parentSizes.get(maxRowIdx);
- long newNumRows = StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, (numParents - 1)), joinFactor);
+ newNumRows = StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, (numParents - 1)), joinFactor);
long newDataSize = StatsUtils.safeMult(StatsUtils.safeMult(maxDataSize, (numParents - 1)), joinFactor);
Statistics wcStats = new Statistics();
wcStats.setNumRows(newNumRows);
@@ -1195,15 +1192,17 @@ public class StatsRulesProcFactory {
return null;
}
- private void inferPKFKRelationship() {
+ private long inferPKFKRelationship(int numAttr, List<Operator<? extends OperatorDesc>> parents,
+ CommonJoinOperator<? extends JoinDesc> jop) {
+ long newNumRows = -1;
if (numAttr == 1) {
// If numAttr is 1, this means we join on one single key column.
Map<Integer, ColStatistics> parentsWithPK = getPrimaryKeyCandidates(parents);
// We only allow one single PK.
if (parentsWithPK.size() != 1) {
- LOG.debug("STATS-" + jop.toString() + ": detects multiple PK parents.");
- return;
+ LOG.debug("STATS-" + jop.toString() + ": detects none/multiple PK parents.");
+ return newNumRows;
}
Integer pkPos = parentsWithPK.keySet().iterator().next();
ColStatistics csPK = parentsWithPK.values().iterator().next();
@@ -1215,7 +1214,7 @@ public class StatsRulesProcFactory {
// csfKs.size() + 1 == parents.size() means we have a single PK and all
// the rest ops are FKs.
if (csFKs.size() + 1 == parents.size()) {
- getSelectivity(parents, pkPos, csPK, csFKs);
+ newNumRows = getCardinality(parents, pkPos, csPK, csFKs, jop);
// some debug information
if (isDebugEnabled) {
@@ -1236,16 +1235,17 @@ public class StatsRulesProcFactory {
}
}
}
+ return newNumRows;
}
/**
- * Get selectivity of reduce sink operators.
+ * Get cardinality of reduce sink operators.
* @param csPK - ColStatistics for a single primary key
* @param csFKs - ColStatistics for multiple foreign keys
*/
- private void getSelectivity(List<Operator<? extends OperatorDesc>> ops, Integer pkPos, ColStatistics csPK,
- Map<Integer, ColStatistics> csFKs) {
- this.pkfkInferred = true;
+ private long getCardinality(List<Operator<? extends OperatorDesc>> ops, Integer pkPos,
+ ColStatistics csPK, Map<Integer, ColStatistics> csFKs,
+ CommonJoinOperator<? extends JoinDesc> jop) {
double pkfkSelectivity = Double.MAX_VALUE;
int fkInd = -1;
// 1. We iterate through all the operators that have candidate FKs and
@@ -1290,13 +1290,15 @@ public class StatsRulesProcFactory {
distinctVals.add(csFK.getCountDistint());
}
}
+ long newNumRows;
if (csFKs.size() == 1) {
// there is only one FK
- this.newNumRows = newrows;
+ newNumRows = newrows;
} else {
// there is more than one FK
- this.newNumRows = this.computeNewRowCount(rowCounts, getDenominator(distinctVals));
+ newNumRows = this.computeNewRowCount(rowCounts, getDenominator(distinctVals));
}
+ return newNumRows;
}
private float getSelectivitySimpleTree(Operator<? extends OperatorDesc> op) {