You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by gp...@apache.org on 2019/05/09 06:29:08 UTC
[drill] branch master updated: DRILL-7227: Fix predicate check in
DrillRelOptUtil.analyzeSimpleEquiJoin
This is an automated email from the ASF dual-hosted git repository.
gparai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new b774eec DRILL-7227: Fix predicate check in DrillRelOptUtil.analyzeSimpleEquiJoin
b774eec is described below
commit b774eec8cc63bd49f638fdf743cf759ee918d50d
Author: Gautam Parai <gp...@maprtech.com>
AuthorDate: Tue Apr 30 14:00:16 2019 -0700
DRILL-7227: Fix predicate check in DrillRelOptUtil.analyzeSimpleEquiJoin
closes #1775
---
.../drill/exec/planner/common/DrillRelOptUtil.java | 10 +++-
.../planner/cost/DrillRelMdDistinctRowCount.java | 64 +++++++++++++++-------
.../org/apache/drill/exec/sql/TestAnalyze.java | 2 +-
3 files changed, 53 insertions(+), 23 deletions(-)
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java
index 3838bf9..82e406a 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java
@@ -678,10 +678,16 @@ public abstract class DrillRelOptUtil {
super.visitCall(call);
} else {
if (call.getKind() == SqlKind.EQUALS) {
- int leftFieldCount = join.getLeft().getRowType().getFieldCount();
- int rightFieldCount = join.getRight().getRowType().getFieldCount();
RexNode leftComparand = call.operands.get(0);
RexNode rightComparand = call.operands.get(1);
+ // If a join condition predicate has something more complicated than a RexInputRef
+ // we bail out!
+ if (!(leftComparand instanceof RexInputRef && rightComparand instanceof RexInputRef)) {
+ joinConditions.clear();
+ throw new Util.FoundOne(call);
+ }
+ int leftFieldCount = join.getLeft().getRowType().getFieldCount();
+ int rightFieldCount = join.getRight().getRowType().getFieldCount();
RexInputRef leftFieldAccess = (RexInputRef) leftComparand;
RexInputRef rightFieldAccess = (RexInputRef) rightComparand;
if (leftFieldAccess.getIndex() >= leftFieldCount + rightFieldCount ||
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
index 8b11a9a..ae62449 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdDistinctRowCount.java
@@ -55,8 +55,12 @@ import org.apache.drill.exec.util.Utilities;
import org.apache.drill.metastore.ColumnStatistics;
import org.apache.drill.metastore.ColumnStatisticsKind;
import org.apache.drill.metastore.TableMetadata;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount{
+ private static final Logger logger = LoggerFactory.getLogger(DrillRelMdDistinctRowCount.class);
+
private static final DrillRelMdDistinctRowCount INSTANCE =
new DrillRelMdDistinctRowCount();
@@ -142,10 +146,7 @@ public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount{
if (groupKey.length() == 0) {
return selectivity * rowCount;
}
- /* If predicate is present, determine its selectivity to estimate filtered rows. Thereafter,
- * compute the number of distinct rows
- */
- selectivity = mq.getSelectivity(scan, predicate);
+
TableMetadata tableMetadata;
try {
tableMetadata = table.getGroupScan().getTableMetadata();
@@ -154,38 +155,43 @@ public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount{
return scan.estimateRowCount(mq) * 0.1;
}
- double s = 1.0;
- boolean allCols = true;
+ double estRowCnt = 1.0;
+ String colName = "";
+ boolean allColsHaveNDV = true;
for (int i = 0; i < groupKey.length(); i++) {
- final String colName = type.getFieldNames().get(i);
- // Skip NDV, if not available
+ colName = type.getFieldNames().get(i);
if (!groupKey.get(i)) {
- allCols = false;
- break;
+ continue;
}
ColumnStatistics columnStatistics = tableMetadata != null ?
tableMetadata.getColumnStatistics(SchemaPath.getSimplePath(colName)) : null;
Double ndv = columnStatistics != null ? (Double) columnStatistics.getStatistic(ColumnStatisticsKind.NDV) : null;
+ // Skip NDV, if not available
if (ndv == null) {
- continue;
+ allColsHaveNDV = false;
+ break;
}
- s *= ndv;
+ estRowCnt *= ndv;
selectivity = getPredSelectivityContainingInputRef(predicate, i, mq, scan);
/* If predicate is on group-by column, scale down the NDV by selectivity. Consider the query
* select a, b from t where a = 10 group by a, b. Here, NDV(a) will be scaled down by SEL(a)
* whereas NDV(b) will not.
*/
if (selectivity > 0) {
- s *= selectivity;
+ estRowCnt *= selectivity;
}
}
- s = Math.min(s, rowCount);
- if (!allCols) {
+ estRowCnt = Math.min(estRowCnt, rowCount);
+ if (!allColsHaveNDV) {
+ if (logger.isDebugEnabled()) {
+ logger.debug(String.format("NDV not available for %s(%s). Using default rowcount for group-by %s",
+ (tableMetadata != null ? tableMetadata.getTableName() : ""), colName, groupKey.toString()));
+ }
// Could not get any NDV estimate from stats - probably stats not present for GBY cols. So Guess!
return scan.estimateRowCount(mq) * 0.1;
} else {
/* rowCount maybe less than NDV(different source), sanity check OR NDV not used at all */
- return s;
+ return estRowCnt;
}
}
@@ -239,18 +245,28 @@ public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount{
if (groupKey.get(idx)) {
// GBY key is present in some filter - now try options A) and B) as described above
double ndvSGby = Double.MAX_VALUE;
+ Double ndv;
boolean presentInFilter = false;
ImmutableBitSet sGby = getSingleGbyKey(groupKey, idx);
if (sGby != null) {
+ // If we see any NULL ndv i.e. cant process ..we bail out!
for (ImmutableBitSet jFilter : joinFiltersSet) {
if (jFilter.contains(sGby)) {
presentInFilter = true;
// Found join condition containing this GBY key. Pick min NDV across all columns in this join
for (int fidx : jFilter) {
if (fidx < left.getRowType().getFieldCount()) {
- ndvSGby = Math.min(ndvSGby, mq.getDistinctRowCount(left, ImmutableBitSet.of(fidx), leftPred));
+ ndv = mq.getDistinctRowCount(left, ImmutableBitSet.of(fidx), leftPred);
+ if (ndv == null) {
+ return super.getDistinctRowCount(joinRel, mq, groupKey, predicate);
+ }
+ ndvSGby = Math.min(ndvSGby, ndv);
} else {
- ndvSGby = Math.min(ndvSGby, mq.getDistinctRowCount(right, ImmutableBitSet.of(fidx-left.getRowType().getFieldCount()), rightPred));
+ ndv = mq.getDistinctRowCount(right, ImmutableBitSet.of(fidx-left.getRowType().getFieldCount()), rightPred);
+ if (ndv == null) {
+ return super.getDistinctRowCount(joinRel, mq, groupKey, predicate);
+ }
+ ndvSGby = Math.min(ndvSGby, ndv);
}
}
break;
@@ -260,9 +276,17 @@ public class DrillRelMdDistinctRowCount extends RelMdDistinctRowCount{
if (!presentInFilter) {
for (int sidx : sGby) {
if (sidx < left.getRowType().getFieldCount()) {
- ndvSGby = mq.getDistinctRowCount(left, ImmutableBitSet.of(sidx), leftPred);
+ ndv = mq.getDistinctRowCount(left, ImmutableBitSet.of(sidx), leftPred);
+ if (ndv == null) {
+ return super.getDistinctRowCount(joinRel, mq, groupKey, predicate);
+ }
+ ndvSGby = ndv;
} else {
- ndvSGby = mq.getDistinctRowCount(right, ImmutableBitSet.of(sidx-left.getRowType().getFieldCount()), rightPred);
+ ndv = mq.getDistinctRowCount(right, ImmutableBitSet.of(sidx-left.getRowType().getFieldCount()), rightPred);
+ if (ndv == null) {
+ return super.getDistinctRowCount(joinRel, mq, groupKey, predicate);
+ }
+ ndvSGby = ndv;
}
}
}
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestAnalyze.java b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestAnalyze.java
index 1d404e1..055c8d5 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestAnalyze.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestAnalyze.java
@@ -290,7 +290,7 @@ public class TestAnalyze extends BaseTestQuery {
query = " select emp.employee_id from dfs.tmp.employeeUseStat emp join dfs.tmp.departmentUseStat dept"
+ " on emp.department_id = dept.department_id "
+ " group by emp.employee_id";
- String[] expectedPlan8 = {"HashAgg\\(group=\\[\\{0\\}\\]\\).*rowcount = 115.49475630811243,.*",
+ String[] expectedPlan8 = {"HashAgg\\(group=\\[\\{0\\}\\]\\).*rowcount = 730.0992454469841,.*",
"HashJoin\\(condition.*\\).*rowcount = 1155.0,.*",
"Scan.*columns=\\[`department_id`, `employee_id`\\].*rowcount = 1155.0.*",
"Scan.*columns=\\[`department_id`\\].*rowcount = 12.0.*"};